# File src/library/tools/R/read.00Index.R # Part of the R package, http://www.R-project.org # # Copyright (C) 1995-2012 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ read.00Index <- function(file) { if(is.character(file)) { if(file == "") file <- stdin() else { file <- file(file, "r") on.exit(close(file)) } } if(!inherits(file, "connection")) stop(gettextf("argument '%s' must be a character string or connection", file), domain = NA) y <- matrix("", nrow = 0L, ncol = 2L) x <- paste(readLines(file), collapse = "\n") ## ## We cannot necessarily assume that the 00Index-style file to be ## read in was generated by @code{Rdindex()} or by R using ## formatDL(style = "table"). In particular, some packages have ## 00Index files with (section) headers and footers in addition to ## the data base chunks which are description lists rendered in ## tabular form. Hence, we need some heuristic for identifying the ## db chunks. Easy to the human eye (is there a column for aligning ## entries?) but far from trivial ... as a first approximation we ## try to consider chunks containing at least one tab or three ## spaces a db chunk. (A better heuristic would be the following: ## entries rendered in one line have item and description separated ## by at least 3 spaces or tabs; entries with a line break have ## continuation lines starting with whitespace (no test whether for ## alignment). If a chunk is made of such entries only it is ## considered a db chunk. But not all current packages follow this ## scheme. Argh.) ## Clearly we need to move to something better in future versions. ## ## First split into paragraph chunks separated by whitespace-only ## lines. for(chunk in unlist(strsplit(x, "\n[ \t\n]*\n"))) { entries <- tryCatch({ if(!grepl("( |\t)", chunk)) NULL else { ## Combine entries with continuation lines. chunk <- gsub("\n[ \t]+", "\t", chunk) ## Split into lines and then according to whitespace. x <- strsplit(unlist(strsplit(chunk, "\n")), "[ \t]") cbind(unlist(lapply(x, "[[", 1L)), unlist(lapply(x, function(t) { paste(t[-c(1L, which(!nzchar(t)))], collapse = " ") }))) } }, error = identity) if(!inherits(entries, "error") && NCOL(entries) == 2L) y <- rbind(y, entries) } colnames(y) <- c("Item", "Description") y }