# File src/library/tools/R/index.R # Part of the R package, http://www.R-project.org # # Copyright (C) 1995-2012 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ ### Miscellaneous indexing functions. ## ## Currently indices are represented as 2-column character matrices. ## To 'merge' indices in the sense of using the values from index B for ## all keys in index A also present in index B, we currently use ## idx <- match(indA[ , 1L], indB[ , 1L], 0L) ## indA[which(idx != 0L), 2L] <- indB[idx, 2L] ## which could be abstracted into a function .mergeIndexEntries(). ## ### * .build_data_index .build_data_index <- function(dataDir, contents) { ## Build an index with information about all available data sets. ## See .build_demo_index() for an explanation of what we do here. ## ## We could also have an interface like ## .build_data_index(dir, contents = NULL) ## where @code{dir} is the path to a package's root source dir and ## contents is ## Rd_contents(list_files_with_type(file.path(dir, "man"), ## "docs")). ## if(!file_test("-d", dataDir)) stop(gettextf("directory '%s' does not exist", dataDir), domain = NA) ## dataFiles <- list_files_with_type(dataDir, "data") dataTopics <- list_data_in_pkg(dataDir=dataDir) if(!length(dataTopics)) return(matrix("", 0L, 2L)) names(dataTopics) <- paste0(names(dataTopics), "/") datasets <- unlist(dataTopics) ## it is possible to have topics that create no object: ## BioC's makecdfenv did. if(!length(datasets)) return(matrix("", 0L, 2L)) names(datasets) <- sub("/[^/]*$", "", names(datasets)) datasets <- sort(datasets) dataIndex <- cbind(datasets, "") ## Note that NROW(contents) might be 0. if(length(datasets) && NROW(contents)) { aliasIndices <- rep(1 : NROW(contents), sapply(contents$Aliases, length)) idx <- match(datasets, unlist(contents$Aliases), 0L) dataIndex[which(idx != 0L), 2L] <- contents[aliasIndices[idx], "Title"] } if(length(datasets)) dataIndex[, 1L] <- as.vector(ifelse(datasets == names(datasets), datasets, paste0(datasets, " (", names(datasets), ")"))) dimnames(dataIndex) <- NULL dataIndex } ### * .build_demo_index .build_demo_index <- function(demoDir) { ## Build an index with information about all available demos. ## ## We use both the contents of @file{00Index} (if possible) and the ## information which demos are actually available to build the real ## demo index. ## This ensures that demo() really lists all *available* demos, even ## if some might be 'undocumented', i.e., without index information. ## Use .check_demo_index() to check whether available demo code and ## docs are in sync. ## if(!file_test("-d", demoDir)) stop(gettextf("directory '%s' does not exist", demoDir), domain = NA) demoFiles <- list_files_with_type(demoDir, "demo") demoTopics <- unique(basename(file_path_sans_ext(demoFiles))) if(!length(demoTopics)) return(matrix("", 0L, 2L)) demoIndex <- cbind(demoTopics, "") if(file_test("-f", INDEX <- file.path(demoDir, "00Index"))) { demoEntries <- tryCatch(read.00Index(INDEX), error = identity) if(inherits(demoEntries, "error")) warning(gettextf("cannot read index information in file '%s'", INDEX), domain = NA) else { idx <- match(demoTopics, demoEntries[ , 1L], 0L) demoIndex[which(idx != 0L), 2L] <- demoEntries[idx, 2L] } } dimnames(demoIndex) <- NULL demoIndex } ### * .check_demo_index .check_demo_index <- function(demoDir) { if(!file_test("-d", demoDir)) stop(gettextf("directory '%s' does not exist", demoDir), domain = NA) info_from_build <- .build_demo_index(demoDir) info_from_index <- tryCatch(read.00Index(file.path(demoDir, "00Index")), error = function(e) stop(gettextf("cannot read index information in file '%s'", file.path(demoDir, "00Index")), domain = NA)) bad_entries <- list(missing_from_index = info_from_build[grep("^[[:space:]]*$", info_from_build[ , 2L]), 1L], missing_from_demos = info_from_index[!info_from_index[ , 1L] %in% info_from_build[ , 1L], 1L]) class(bad_entries) <- "check_demo_index" bad_entries } print.check_demo_index <- function(x, ...) { if(length(x$missing_from_index)) { writeLines("Demos with missing or empty index information:") print(x$missing_from_index, ...) } if(length(x$missing_from_demos)) { writeLines("Demo index entries without corresponding demo:") print(x$missing_from_demos, ...) } invisible(x) } ### * .build_hsearch_index .build_hsearch_index <- function(contents, packageName, defaultEncoding = NULL) { ## Build an index of the Rd contents in 'contents', of a package ## named 'packageName' in a form useful for help.search(). ## As from 2.3.0 the installation directory is no longer recorded, ## but the format is kept for back-compatibility. dbAliases <- dbConcepts <- dbKeywords <- matrix(character(), ncol = 3L) if((nr <- NROW(contents)) > 0L) { ## IDs are used for indexing the Rd objects in the help.search ## db. IDs <- seq_len(nr) if(!is.data.frame(contents)) { colnames(contents) <- c("Name", "Aliases", "Title", "Keywords") base <- contents[, c("Name", "Title"), drop = FALSE] ## If the contents db is not a data frame, then it has the ## aliases collapsed. Split again as we need the first ## alias as the help topic to indicate for matching Rd ## objects. aliases <- strsplit(contents[, "Aliases"], " +") ## Don't do this for keywords though, as these might be ## non-standard (and hence contain white space ...). encoding <- NULL } else { base <- as.matrix(contents[, c("Name", "Title")]) aliases <- contents[, "Aliases"] encoding <- contents$Encoding # may not be there ... } if(is.null(encoding)) encoding <- character(length = nr) if(!is.null(defaultEncoding)) encoding[!nzchar(encoding)] <- defaultEncoding keywords <- contents[, "Keywords"] ## We create 4 character matrices (cannot use data frames for ## efficiency reasons): 'dbBase' holds all character string ## data; 'dbAliases', 'dbConcepts' and 'dbKeywords' hold ## character vector data in a 3-column character matrix format ## with entry, ID of the Rd object the entry comes from, and the ## package the object comes from. The latter is useful when ## subscripting the help.search db according to package. dbBase <- cbind(packageName, "", IDs, base, topic = sapply(aliases, "[", 1L), encoding) ## If there are no aliases at all, cbind() below would give ## matrix(packageName, ncol = 1L). (Of course, Rd objects ## without aliases are useless ...) if(length(tmp <- unlist(aliases))) dbAliases <- cbind(tmp, rep.int(IDs, sapply(aliases, length)), packageName) ## And similarly if there are no keywords at all. if(length(tmp <- unlist(keywords))) dbKeywords <- cbind(tmp, rep.int(IDs, sapply(keywords, length)), packageName) ## Finally, concepts are a feature added in R 1.8 ... if("Concepts" %in% colnames(contents)) { concepts <- contents[, "Concepts"] if(length(tmp <- unlist(concepts))) dbConcepts <- cbind(tmp, rep.int(IDs, sapply(concepts, length)), packageName) } } else dbBase <- matrix(character(), ncol = 7L) colnames(dbBase) <- c("Package", "LibPath", "ID", "name", "title", "topic", "Encoding") colnames(dbAliases) <- c("Aliases", "ID", "Package") colnames(dbKeywords) <- c("Keywords", "ID", "Package") colnames(dbConcepts) <- c("Concepts", "ID", "Package") list(dbBase, dbAliases, dbKeywords, dbConcepts) } ### * .build_links_index .build_links_index <- function(contents, package) { if(length(contents)) { aliases <- contents$Aliases lens <- sapply(aliases, length) files <- sub("\\.[Rr]d$", "\\.html", contents$File) structure(file.path("../..", package, "html", rep.int(files, lens)), names = unlist(aliases)) } else character() } ### mode: outline-minor *** ### outline-regexp: "### [*]+" *** ### End: ***