# File src/library/tools/R/Rdtools.R # Part of the R package, http://www.R-project.org # # Copyright (C) 1995-2013 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ RdTextFilter <- function(ifile, encoding = "unknown", keepSpacing = TRUE, drop = character(), keep = character()) { if(inherits(ifile, "srcfile")) ifile <- ifile$filename if (inherits(ifile, "Rd")) { # Undo sorting done in prepare2_Rd srcrefs <- sapply(ifile, function(s) attr(s, "srcref")) p <- ifile[ order(srcrefs[1,], srcrefs[2,]) ] class(p) <- class(ifile) } else p <- parse_Rd(ifile, encoding = encoding) tags <- RdTags(p) if ("\\encoding" %in% tags) { encoding <- p[[which.max(tags == "\\encoding")]][[1L]] if (encoding %in% c("UTF-8", "utf-8", "utf8")) encoding <- "UTF-8" if (!inherits(ifile, "Rd")) p <- parse_Rd(ifile, encoding=encoding) } else encoding <- "" ## Directly using a text connection to accumulate the filtered ## output seems to be faster than using .eval_with_capture(): to use ## the latter, change mycat to cat, or use mycat <- cat, and create ## out via ## out <- .eval_with_capture({ ## show(p) ## mycat("\n") ## })$output myval <- character() mycon <- textConnection("myval", open = "w", local = TRUE, encoding = "UTF-8") on.exit(close(mycon)) mycat <- function(...) cat(..., file = mycon) prevline <- 1L prevcol <- 0L doPartialMarkup <- function(x, tags, i) { # handle things like \bold{pre}fix result <- FALSE if (i < length(tags) && tags[i+1L] == "TEXT" && length(x[[i]]) == 1L && tags[i] %in% c("\\bold", "\\emph", "\\strong", "\\link") && !(tags[i] %in% drop) && RdTags(x[[i]]) == "TEXT") { text1 <- x[[i]][[1L]] if (length(grep("[^[:space:]]$", text1))) { # Ends in non-blank text2 <- x[[i+1L]] if (length(grep("^[^[:space:]]", text2))) { # Starts non-blank show(text1) prevcol <<- prevcol+1L # Shift text2 left by one column saveline <- prevline show(text2) if (prevline == saveline) prevcol <<- prevcol-1L result <- TRUE } } } result } show <- function(x) { srcref <- attr(x, "srcref") firstline <- srcref[1L] firstbyte <- srcref[2L] lastline <- srcref[3L] lastbyte <- srcref[4L] firstcol <- srcref[5L] lastcol <- srcref[6L] tag <- attr(x, "Rd_tag") if (is.null(tag)) tag <- "NULL" if (tag %in% drop) tag <- "DROP" else if (tag %in% keep) tag <- "KEEPLIST" # Include both text and lists switch(tag, KEEP =, TEXT = { if (prevline < firstline) { prevcol <<- 0L mycat(rep.int("\n", if(keepSpacing) firstline - prevline else 1L)) } if (keepSpacing) mycat(rep.int(" ", firstcol - prevcol - 1L), sep = "") x <- as.character(srcref) # go back to original form mycat(x, sep = "") prevcol <<- lastcol prevline <<- lastline }, "\\S3method"=, "\\S4method"=, "\\command"=, "\\docType"=, "\\email"=, "\\encoding"=, "\\file"=, "\\keyword"=, "\\link"=, "\\linkS4class"=, "\\method"=, "\\pkg"=, "\\var"=, DROP = {}, # do nothing "\\tabular"=, "#ifdef"=, "#ifndef"={ # Ignore the first arg, process the second show(x[[2L]]) }, "\\item"={ # Ignore the first arg of a two-arg item if (length(x) == 2L) show(x[[2L]]) }, { # default if (is.list(x)) { tags <- RdTags(x) i <- 0L while (i < length(x)) { i <- i + 1L if (doPartialMarkup(x, tags, i)) i <- i + 1L else show(x[[i]]) } } else if (tag == "KEEPLIST") { attr(x, "Rd_tag") <- "KEEP" show(x) } }) } show(p) mycat("\n") out <- textConnectionValue(mycon) ## Ideally, we would always canonicalize to UTF-8. ## However, when using RdTextFilter() for aspell(), it is not clear ## whether this is a good idea: the aspell program does not need to ## have full UTF-8 support (and what precisely holds is not clear: ## the manuals says that aspell ## can easily check documents in UTF-8 without having to use a ## special dictionary. ## but also ## If Aspell is compiled with a version of the curses library that ## support wide characters then Aspell can also check UTF-8 text. ## So at least until this can be resolved, turn filter results for ## Rd files originally in latin1 back to latin1. if(encoding == "latin1") out <- iconv(out, "UTF-8", "latin1") out }