# File src/library/tools/R/license.R # Part of the R package, http://www.R-project.org # # Copyright (C) 1995-2013 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ ## ## We want *standardized* license specs so that we can compute on them. ## In particular, we want to know whether licenses are recognizable as ## FOSS (http://en.wikipedia.org/wiki/Free_and_open-source_software) ## licenses. ## ## A license spec is standardized ("canonical") if it is an alternative ## of component specs which are one of the following: ## ## A. "Unlimited" ## B. "file LICENSE" or "file LICENCE" ## C. A specification based on the R license db ## * A standard short specification (SSS field) ## * The name or abbreviation of an unversioned license ## * The name of abbreviation of a versioned license, optionally ## followed by a version spec ## * The name of a versioned license followed by the version ## * The abbrevation of a versioned license combined with '-', ## optionally followed by an extension spec as in B (in principle, ## only if the base license is extensible). ## ## A license spec is standardizable if we know to transform it to ## standardized form. ## ## Note that the R license db also contains non-FOSS licenses, and hence ## information (FOSS field) on the FOSS status of the licenses. ## Ideally, a license taken as FOSS would be approved as free by the FSF ## and as open by the OSI: we also take licenses as FOSS when approved ## by the FSF (and not rejected by the OSI). ## ## See ## http://www.gnu.org/licenses/license-list.html ## http://opensource.org/licenses/alphabetical ## fot the FSF and OSI license lists, and also ## http://www.fsf.org/licensing/licenses ## http://en.wikipedia.org/wiki/List_of_FSF_approved_software_licences ## http://en.wikipedia.org/wiki/List_of_OSI_approved_software_licences ## for more information. ## re_anchor <- function(s) if(length(s)) paste0("^", s, "$") else character() re_group <- function(s) if(length(s)) paste0("(", s, ")") else character() re_or <- function(s, group = TRUE) { if(!length(s)) character() else if(group) re_group(paste(s, collapse = "|")) else paste(s, collapse = "|") } .make_R_license_db <- function(paths = NULL) { if(is.null(paths)) paths <- unlist(strsplit(Sys.getenv("R_LICENSE_DB_PATHS"), .Platform$path.sep, fixed = TRUE)) paths <- c(paths, file.path(R.home("share"), "licenses", "license.db")) ldb <- Reduce(function(u, v) merge(u, v, all = TRUE), lapply(unique(normalizePath(paths)), read.dcf)) ## Merging matrices gives a data frame. ldb <- as.matrix(ldb) ldb[is.na(ldb)] <- "" ## (Could also keep NAs and filter on is.finite() in subsequent ## computations.) ## FOSS == "yes" implues Restricts_use = "no": ldb[ldb[, "FOSS"] == "yes", "Restricts_use"] <- "no" ldb <- data.frame(ldb, stringsAsFactors = FALSE) ldb$Labels <- R_license_db_labels(ldb) ldb[!duplicated(ldb$Labels), ] } R_license_db_labels <- function(ldb) { if(is.null(ldb)) return(NULL) lab <- ldb$SSS pos <- which(lab == "") abbrevs <- ldb$Abbrev[pos] versions <- ldb$Version[pos] lab[pos] <- ifelse(abbrevs != "", abbrevs, ldb$Name[pos]) ind <- nzchar(versions) pos <- pos[ind] lab[pos] <- sprintf("%s version %s", lab[pos], versions[ind]) lab } R_license_db <- local({ val <- NULL function(new) { if(!missing(new)) val <<- new else val } }) R_license_db(.make_R_license_db()) .make_R_license_db_vars <- function() { ## Build license regexps and tables according to the specs. ldb <- R_license_db() ## Standard short specification (SSS field) from the R license db. pos <- which(nzchar(ldb$SSS)) names(pos) <- ldb$SSS[pos] tab_sss <- pos has_version <- nzchar(ldb$Version) has_abbrev <- nzchar(ldb$Abbrev) ## Name or abbreviation of an unversioned license from the R license ## db. pos <- which(!has_version) names(pos) <- ldb$Name[pos] tab_unversioned <- pos pos <- which(has_abbrev & !has_version) tab_unversioned[ldb$Abbrev[pos]] <- pos ## Versioned licenses from the R license db. ## Style A: Name of abbreviation of a versioned license, optionally ## followed by a version spec ## Style B: Name of a versioned license followed by the version. ## Style C: Abbrevation of a versioned license combined with '-'. pos <- which(has_version) names(pos) <- ldb$Name[pos] tab_versioned_style_A <- split(pos, names(pos)) tab_versioned_style_B <- pos names(tab_versioned_style_B) <- paste(names(pos), ldb$Version[pos]) pos <- which(has_version & has_abbrev) tab_versioned_style_A <- c(tab_versioned_style_A, split(pos, ldb$Abbrev[pos])) tab_versioned_style_C <- pos names(tab_versioned_style_C) <- sprintf("%s-%s", ldb$Abbrev[pos], ldb$Version[pos]) operators <- c("<", "<=", ">", ">=", "==", "!=") re_numeric_version <- .standard_regexps()$valid_numeric_version re_single_version_spec <- paste0("[[:space:]]*", re_or(operators), "[[:space:]]*", re_group(re_numeric_version), "[[:space:]]*") re_version_spec <- paste0("\\(", paste0("(", re_single_version_spec, ",)*"), re_single_version_spec, "\\)") re_sss <- re_or(names(tab_sss)) re_unversioned <- re_or(names(tab_unversioned)) re_versioned_style_A <- paste0(re_or(names(tab_versioned_style_A)), "[[:space:]]*", paste0("(", re_version_spec, ")*")) ## Let's be nice ... re_versioned_style_B <- re_or(paste0(ldb$Name[has_version], "[[:space:]]+([Vv]ersion[[:space:]]+)?", ldb$Version[has_version])) re_versioned_style_C <- re_or(names(tab_versioned_style_C)) re_license_in_db <- re_or(c(re_sss, re_unversioned, re_versioned_style_A, re_versioned_style_B, re_versioned_style_C)) re_license_file <- "file LICEN[CS]E" re_license_extension <- sprintf("[[:space:]]*\\+[[:space:]]*%s", re_license_file) ## ## Many standard licenses actually do not allow extensions. ## Ideally, we would only allow the extension markup for extensible ## standard licenses, as identified via an Extensible: TRUE field in ## the license db. But version ranges make this tricky: e.g., ## GPL (>= 2) + file LICENSE ## is not right as GPL-2 does not allow extensions ... ## Hence, for now allow the extension markup with all standard ## licenses. ## re_component <- re_anchor(re_or(c(sprintf("%s(%s)?", re_license_in_db, re_license_extension), re_license_file, "Unlimited"))) list(re_component = re_component, re_license_file = re_license_file, re_license_extension = re_license_extension, re_single_version_spec = re_single_version_spec, re_sss = re_sss, re_unversioned = re_unversioned, re_versioned_style_A = re_versioned_style_A, re_versioned_style_B = re_versioned_style_B, re_versioned_style_C = re_versioned_style_C, tab_sss = tab_sss, tab_unversioned = tab_unversioned, tab_versioned_style_A = tab_versioned_style_A, tab_versioned_style_B = tab_versioned_style_B, tab_versioned_style_C = tab_versioned_style_C) } R_license_db_vars <- local({ val <- NULL function(new) { if(!missing(new)) val <<- new else val } }) R_license_db_vars(.make_R_license_db_vars()) R_license_db_refresh_cache <- function(paths = NULL) { R_license_db(.make_R_license_db(paths)) R_license_db_vars(.make_R_license_db_vars()) } ## Standardizable license specs: ## License specifications found on CRAN/BioC/Omegahat and manually ## classified as standardizable software licenses (even though not ## standardized/canonical), provided as a list of license specs named by ## the respective standardizations. ## With ongoing standardization this should gradually be eliminated. ## Last updated: 2009-02-19. ## Nasty issues. ## * There really is no GPL version 2.0. ## Unfortunately, the FSF uses 2.0 in URLs or links ## (http://www.gnu.org/licenses/old-licenses/gpl-2.0.html) ## The text clearly says "Version 2, June 1991". ## * There really is no LGPL version 2.0. ## Unfortunately, the FSF uses 2.0 in URLs or links ## (http://www.gnu.org/licenses/old-licenses/). ## The text clearly says "Version 2, June 1991". ## * CeCILL is a bit of a mess: the current version is referred to as ## "version 2" (http://www.cecill.info/licences.en.html) but ## internally uses "Version 2.0 dated 2006-09-05" ## (http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt). .standardizable_license_specs <- list("Artistic-2.0" = c("The Artistic License, Version 2.0", "Artistic 2.0", "Artistic-2.0, see http://www.opensource.org/licenses/artistic-license-2.0.php" ), "BSL" = c("Boost Software License", "Boost Software License 1.0", "BSL 1.0" ), "CeCILL-2" = c("CeCILL-2.0" ), "GPL" = c("GNU Public License", "Gnu GPL", "GNU GPL", "GPL (http://www.gnu.org/copyleft/gpl.html)" ), "GPL-2" = c(## ## There is no GPL 2.0, see above. "GNU General Public License 2.0.", "GPL 2.0", "GPL version 2.0", "GPL2.0", ## "GPL Version 2", "GNU GPL Version 2", "GNU GPL version 2", "GNU GPL version 2.", "GPL (version 2)", "GPL 2", "GPL 2.", "GPL version 2", "GPL version 2 (June, 1991)", "GPL version 2.", "GPL2", ## BioC: "GPL V2", "GPL, version 2" ), "GPL-3" = c("GPL Version 3", "GPL version 3", "GNU General Public Licence (GPLv3)", "GPL 3", "GPL v3" ), "GPL (>= 2)" = c(## ## There is no GPL 2.0, see above. "GNU GPL v2.0 or greater", "GPL 2.0 or higher", "GPL 2.0 or newer", "GPL version 2.0 or later", "GPL version 2.0 or newer", ## "GNU GPL (version 2 or later)", "GNU GPL (version 2 or later); see the file COPYING for details", "GNU GPL version 2 or newer", "GNU General Public License version 2 or newer", "GPL version 2 or later", "GPL ( version 2 or later)", "GPL (Version 2 or above)", "GPL (Version 2 or later)", "GPL (version 2 or higher)", "GPL (version 2 or later)", "GPL (version 2 or later, see the included file GPL)", "GPL (version 2 or newer)", "GPL 2 or later", "GPL 2 or newer", "GPL version 2 or any later version", "GPL Version 2 or later", "GPL Version 2 or later.", "GPL Version 2 or newer", "GPL Version 2 or newer.", "GPL version 2 (June, 1991) or later", "GPL version 2 (or newer)", "GPL version 2 or later.", "GPL version 2 or newer", "GPL version 2 or newer (http://www.gnu.org/copyleft/gpl.html)", "GPL version 2 or newer (see README).", "GPL version 2 or newer.", "GPL version 2 or newer. http://www.gnu.org/copyleft/gpl.html", "GPL version 2, or, at your option, any newer version.", "GPL Version 2 (or later)", "GPL version 2 (or later)", "GPL version 2 or higher", "GPL2 or later", "GPL>=2", "GNU General Public License (version 2 or later)" ), "GPL (>= 3)" = c("GPL (version 3 or later)", "GPL >=3" ), "GPL | LGPL" = c("GPL or LGPL by your choice" ), "GPL | BSD" = c("GPL, BSD" ), "GPL-2 | file LICENSE" = c("use under GPL2, or see file LICENCE" ), "LGPL" = c("LGPL (see ).", "GNU LGPL (same as wxWidgets)." ), "LGPL-2" = c("LGPL2", "LGPL2.0" ), "LGPL-2.1" = c("LGPL version 2.1" ), "LGPL-3" = c("LGPL-v3" ), "LGPL (>= 2.0)" = c(## ## There is no LGPL-2.0, see above. "LGPL >= 2.0", ## "LGPL Version 2 or later.", "LGPL version 2 or newer", "LGPL (version 2 or later)", "LGPL version 2 or later" ), "LGPL (>= 2.1)" = c("LGPL version 2.1 or later" ), "LGPL (>= 3.0)" = c("LGPL >=3" ), "X11" = c("X11 (http://www.x.org/Downloads_terms.html)" ), "Unlimited" = c("Unlimited use and distribution." ) ) .standardizable_license_specs_db <- data.frame(ispecs = unlist(.standardizable_license_specs), ospecs = rep.int(names(.standardizable_license_specs), sapply(.standardizable_license_specs, length)), stringsAsFactors = FALSE) analyze_license <- function(x) { .make_results <- function(is_empty = FALSE, is_canonical = FALSE, bad_components = character(), is_standardizable = FALSE, is_verified = FALSE, standardization = NA_character_, components = NULL, expansions = NULL, extensions = NULL, pointers = NULL, is_FOSS = NA, restricts_use = NA) list(is_empty = is_empty, is_canonical = is_canonical, bad_components = bad_components, is_standardizable = is_standardizable, is_verified = is_verified, standardization = standardization, components = components, expansions = expansions, extensions = extensions, pointers = pointers, is_FOSS = is_FOSS, restricts_use = restricts_use) x <- .strip_whitespace(x) if(is.na(x) || (x == "")) { ## Not really a lot to check ... ## (Note that non-standardizable license specs are dropped by ## writePACKAGES() and friends.) return(.make_results(is_empty = TRUE)) } pointers <- NULL extensions <- NULL expansions <- NULL is_verified <- FALSE is_FOSS <- NA restricts_use <- NA ## Try splitting into the individual components. components <- .strip_whitespace(unlist(strsplit(x, "|", fixed = TRUE))) ## Now analyze the individual components. ok <- grepl(R_license_db_vars()$re_component, components) bad_components <- components[!ok] is_canonical <- all(ok) ## Is the license specification standardizable? standardizable <- components %in% .standardizable_license_specs_db$ispecs is_standardizable <- (is_canonical || all(standardizable)) standardization <- if(is_standardizable) { ## Standardize the ones which are standardizable but not yet ## standardized. ind <- !ok & standardizable if(any(ind)) components[ind] <- .standardize_license_components(components[ind]) ## Canonicalize the standardized ones a bit more (as we are ## rather generous about using whitespace). ind <- ok & grepl("\\(", components) if(any(ind)) { s <- sub("[[:space:]]*\\([[:space:]]*", " \\(", components[ind]) s <- sub("[[:space:]]*\\)", "\\)", s) s <- gsub("[[:space:]]*,[[:space:]]*", ", ", s) ## Really re_or(operators) ... s <- gsub("[[:space:]]+(<=?|>=?|==|!=)", " \\1", s) components[ind] <- gsub(sprintf("[[:space:]]*(%s)", .standard_regexps()$valid_numeric_version), " \\1", s) } paste(components, collapse = " | ") } else NA_character_ ## Analyze components provided that we know we can standardize. if(is_standardizable) { verifiable <- function(x, v = "yes") !is.null(x) && all(!is.na(x) & (x == v)) ## (More generally we could test for positive length of x: but ## a length test is needed because all(NULL) |=> TRUE.) expansions <- lapply(components, expand_license_spec_component_from_db) ## The license is FOSS if there is one component which is ## "Unlimited" or has a positive number of expansions all of ## which are FOSS. ## If all components have a positive number of expansions where ## at least one is not FOSS, the license is not FOSS. ## Otherwise we do not know. is_FOSS <- if(any(components == "Unlimited")) { TRUE } else if(any(sapply(expansions, function(e) verifiable(e$FOSS)))) { TRUE } else if(all(sapply(expansions, function(e) any(e$FOSS == "no")))) { FALSE } else NA ## The license is verified (as FOSS) if it was verified as FOSS. is_verified <- !is.na(is_FOSS) && is_FOSS ## The license does not restrict use if it is verified as FOSS, ## or if there is one component with a positive number of ## expansions all of which do not restrict use. ## If all components have a positive number of expansions where ## at least one of which restricts use, the license restricts ## use. ## Otherwise, we do not know. restricts_use <- if(is_verified) { FALSE } else if(any(sapply(expansions, function(e) (length(e) && all(e$Restricts_use == "no"))))) { FALSE } else if(all(sapply(expansions, function(e) any(e$Restricts_use == "yes")))) { TRUE } else NA re <- R_license_db_vars()$re_license_file pos <- grep(sprintf("%s$", re), components) if(length(pos)) { elements <- components[pos] ## Components with license file pointers. pointers <- sub(".*file ", "", elements) ## Components with license extensions. ind <- grepl("+", elements, fixed = TRUE) if(any(ind)) extensions <- data.frame(components = elements[ind], extensible = sapply(expansions[pos[ind]], function(e) verifiable(e$Extensible)), stringsAsFactors = FALSE) } ## Replace expansions by their labels from the license db. ## (As these are unique, we can always easily get the full ## expansions back.) expansions <- lapply(expansions, `[[`, "Labels") ## Components which are "Unlimited" or "file LICEN[CS]E" have ## empty expansions: ind <- grepl(sprintf("^(Unlimited|%s)$", re), components) if(any(ind)) expansions[ind] <- as.list(components[ind]) ## Components with license extensions have this dropped in the ## expansion. m <- regexpr(sprintf("\\+ *%s$", re), components) ind <- (m > -1L) expansions[ind] <- Map(paste, expansions[ind], regmatches(components, m)) } .make_results(is_canonical = is_canonical, bad_components = bad_components, is_standardizable = is_standardizable, standardization = standardization, is_verified = is_verified, components = components, expansions = expansions, extensions = extensions, pointers = pointers, is_FOSS = is_FOSS, restricts_use = restricts_use) } .standardize_license_components <- function(x) { with(.standardizable_license_specs_db, ospecs[match(x, ispecs)]) } analyze_licenses <- function(x, db = NULL) { x <- as.character(x) if(!length(x)) return(NULL) ## As analyzing licenses is costly, only analyze the unique specs. v <- unique(x) out <- as.data.frame(do.call(rbind, lapply(v, analyze_license)), stringsAsFactors = FALSE) pos <- match(c("is_empty", "is_canonical", "is_standardizable", "is_verified", "standardization", "is_FOSS", "restricts_use"), names(out)) out[pos] <- lapply(out[pos], unlist) ## And re-match specs to the unique specs. out <- out[match(x, v), ] rownames(out) <- NULL if(!is.null(db)) { ## db should be a package db (data frame or character matrix) ## with rows corresponding to the elements of x. cnms <- colnames(db) if(!is.na(pos <- match("License_is_FOSS", cnms))) { lif <- db[, pos] pos <- which(!is.na(lif)) out$is_FOSS[pos] <- out$is_verified[pos] <- (lif[pos] == "yes") ## is_FOSS implies !restricts_use: pos <- pos[lif[pos] == "yes"] out$restricts_use[pos] <- FALSE } if(!is.na(pos <- match("License_restricts_use", cnms))) { lru <- db[, pos] pos <- which(!is.na(lru)) out$restricts_use[pos] <- (lru[pos] == "yes") ## restricts_use implies !is_FOSS: pos <- pos[lru[pos] == "yes"] out$is_FOSS[pos] <- out$is_verified[pos] <- FALSE } } out } build_license_db <- function(dir, unpacked = FALSE) { CRAN <- getOption("repos")["CRAN"] if(missing(dir) && substring(CRAN, 1L, 7L) == "file://") dir <- file.path(substring(CRAN, 8L), "src", "contrib") fields <- c("License", "License_is_FOSS", "License_restricts_use", "Maintainer") db <- .build_repository_package_db(dir, fields, unpacked = unpacked) ## Actually, for Omegehat this is not a good idea as this retains ## old versions in the "main" src/contrib directory. But let's not ## worry about this for now ... db <- do.call("rbind", db) ## Retain what is needed ... data.frame(db[ , c("Package", "Version", fields)], stringsAsFactors = FALSE) } analyze_licenses_in_license_db <- function(db) { results <- cbind(db, analyze_licenses(db$License, db)) ## Keep License_is_FOSS and License_restricts_use columns for now, ## so that we can identify the is_FOSS and restricts_use values ## obtained from these. results } analyze_licenses_in_repository <- function(dir, unpacked = FALSE, full = TRUE) { db <- build_license_db(dir, unpacked) if(!full) { ## Only keep the highest available versions. ## Such an option might be useful for build_license_db() ## itself. db <- .remove_stale_dups(db) } analyze_licenses_in_license_db(db) } summarize_license_db <- function(db) { packages <- db$Package if(any(duplicated(packages))) packages <- sprintf("%s_%s", packages, db$Version) packages <- split(packages, db$License) licenses <- names(packages) out <- data.frame(Licenses = licenses, stringsAsFactors = FALSE) ## To get the 'packages' list into a data frame without I() ... out$Packages <- packages cat(formatDL(out$Licenses, sapply(out$Packages, function(p) paste(unique(p), collapse = " ")), style = "list"), sep = "\n\n") invisible(out) } expand_license_spec_component_from_db <- function(x) { ## Determine the license from the db matching a license spec ## component. ldb <- R_license_db() ldb_vars <- R_license_db_vars() .numeric_version_meets_constraints_p <- function(version, constraints) { version <- as.numeric_version(version) for(term in constraints) { re <- ldb_vars$re_single_version_spec op <- sub(re, "\\1", term) target <- sub(re, "\\2", term) if(!eval(parse(text = paste("version", op, "target")))) return(FALSE) } TRUE } if(x == "Unlimited" || grepl(x, ldb_vars$re_license_file)) return(NULL) ## Drop possible license extension. x <- sub(ldb_vars$re_license_extension, "", x) if(grepl(re_anchor(ldb_vars$re_sss), x)) { pos <- ldb_vars$tab_sss[x] ldb[pos, ] } else if(grepl(re_anchor(ldb_vars$re_unversioned), x)) { pos <- ldb_vars$tab_unversioned[x] ldb[pos, ] } else if(grepl(re <- re_anchor(ldb_vars$re_versioned_style_A), x)) { ## Extract name/abbrev and version spec. v <- sub(re, "\\2", x) x <- sub(re, "\\1", x) ## First, find the matching entries matching the name/abbrev. pos <- ldb_vars$tab_versioned_style_A[[x]] entries <- ldb[pos, ] ## Now determine the entries satisfying the version spec. v <- sub("[[:space:]]*\\((.*)\\)[[:space:]]*", "\\1", v) if(v != "") { constraints <- unlist(strsplit(v, "[[:space:]]*,[[:space:]]*")) entries <- entries[sapply(entries$Version, .numeric_version_meets_constraints_p, constraints), ] } entries } else if(grepl(re_anchor(ldb_vars$re_versioned_style_B), x)) { re <- sprintf("[[:space:]]+([Vv]ersion[[:space:]]+)?(%s)", .standard_regexps()$valid_numeric_version) x <- sub(re, " \\2", x) pos <- ldb_vars$tab_versioned_style_B[x] ldb[pos, ] } else if(grepl(re_anchor(ldb_vars$re_versioned_style_C), x)) { pos <- ldb_vars$tab_versioned_style_C[x] ldb[pos, ] } }