#  File src/library/stats/R/smspline.R
#  Part of the R package, http://www.R-project.org
#
#  Copyright (C) 1995-2014 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/

.nknots.smspl <- function(n) {
    ## Number of inner knots
    if(n < 50L) n
    else trunc({
        a1 <- log2( 50)
        a2 <- log2(100)
        a3 <- log2(140)
        a4 <- log2(200)
        if	(n < 200L) 2^(a1+(a2-a1)*(n-50)/150)
        else if (n < 800L) 2^(a2+(a3-a2)*(n-200)/600)
        else if (n < 3200L)2^(a3+(a4-a3)*(n-800)/2400)
        else  200 + (n-3200)^0.2
    })
}
n.knots <- function(n) {
    message(".nknots.smspl() is now exported; use it instead of n.knots()")
    .nknots.smspl(n)
}

smooth.spline <-
    function(x, y = NULL, w = NULL, df, spar = NULL, cv = FALSE,
             all.knots = FALSE, nknots = .nknots.smspl, keep.data = TRUE,
             df.offset = 0, penalty = 1, control.spar = list(),
             tol = 1e-6 * IQR(x))
{
    contr.sp <- list(low = -1.5, # low = 0.      was default till R 1.3.x
                     high = 1.5,
                     tol = 1e-4, # tol = 0.001   was default till R 1.3.x
                     eps = 2e-8, # eps = 0.00244 was default till R 1.3.x
                     maxit = 500, trace = getOption("verbose"))
    contr.sp[names(control.spar)] <- control.spar
    if(!all(sapply(contr.sp[1:4], is.numeric)) ||
       contr.sp$tol < 0 || contr.sp$eps <= 0 || contr.sp$maxit <= 0)
        stop("invalid 'control.spar'")

    xy <- xy.coords(x, y)
    y <- xy$y
    x <- xy$x
    if(!all(is.finite(c(x, y))))
        stop("missing or infinite values in inputs are not allowed")
    n <- length(x)
    if(is.na(n)) stop("invalid number of points")
    w <-
	if(is.null(w)) rep_len(1, n)
	else {
	    if(n != length(w)) stop("lengths of 'x' and 'w' must match")
	    if(any(w < 0)) stop("all weights should be non-negative")
	    if(all(w == 0)) stop("some weights should be positive")
	    (w * sum(w > 0))/sum(w)
	} # now sum(w) == #{obs. with weight > 0} == sum(w > 0)

    ## Replace y[], w[] for same x[] (to a precision of 'tol') by their mean :
    if(!is.finite(tol) || tol <= 0)
        stop("'tol' must be strictly positive and finite")
    xx <- round((x - mean(x))/tol)  # de-mean to avoid possible overflow
    nd <- !duplicated(xx); ux <- sort(x[nd]); uxx <- sort(xx[nd])
    nx <- length(ux)
    if(nx <= 3L) stop("need at least four unique 'x' values")
    if(nx == n) { # speedup
	ox <- TRUE
	tmp <- cbind(w, w*y, w*y^2)[order(x),]
    } else {
	ox <- match(xx, uxx)
	## Faster, much simplified version of tapply()
	tapply1 <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) {
	    sapply(X = unname(split(X, INDEX)), FUN = FUN, ...,
		   simplify = simplify, USE.NAMES = FALSE)
	}
	tmp <- matrix(unlist(tapply1(seq_len(n), ox,
				     function(i, y, w)
				     c(sum(w[i]), sum(w[i]*y[i]),sum(w[i]*y[i]^2)),
				     y = y, w = w), use.names = FALSE),
		      ncol = 3, byrow = TRUE)
    }
    wbar <- tmp[, 1L]
    ybar <- tmp[, 2L]/ifelse(wbar > 0, wbar, 1)
    yssw <- sum(tmp[, 3L] - wbar*ybar^2) # will be added to RSS for GCV
    ## Note: now  cv in {NA,FALSE,TRUE}
    if(is.na(cv) && !missing(df))
	stop("'cv' must not be NA when 'df' is specified")
    CV <- !is.na(cv) && cv
    if(CV && nx < n)
        warning("cross-validation with non-unique 'x' values seems doubtful")
    r.ux <- ux[nx] - ux[1L]
    xbar <- (ux - ux[1L])/r.ux           # scaled to [0,1]
    if(all.knots) {
        if(!missing(nknots) && !is.null(nknots))
            warning("'all.knots' is TRUE; 'nknots' specification is disregarded")
        nknots <- nx
    } else if(is.null(nknots))# <- for back compatibility
	nknots <- .nknots.smspl(nx)
    else {
	if(is.function(nknots))
	    nknots <- nknots(nx)
	else if(!is.numeric(nknots))
	    stop("'nknots' must be numeric (in {1,..,n})")
	if(nknots < 1)
	    stop("'nknots' must be at least 1")
	else if(nknots > nx)
	    stop("cannot use more inner knots than unique 'x' values")
    }
    knot <- c(rep(xbar[1 ], 3),
              if(all.knots) xbar else xbar[seq.int(1, nx, length.out = nknots)],
              rep(xbar[nx], 3))
    nk <- nknots + 2L ## == length(knot) - 4

    ## ispar != 1 : compute spar (later)
    ispar <-
        if(is.null(spar) || missing(spar)) { ## || spar == 0
            if(contr.sp$trace) -1L else 0L
        } else 1L
    spar <- if(ispar == 1L) as.double(spar) else double(1)
    ## was <- if(missing(spar)) 0 else if(spar < 1.01e-15) 0 else  1
    ## but package forecast passed a length-0 vector.
    if(length(spar) != 1) stop("'spar' must be of length 1")

    ## icrit {../src/sslvrg.f}:
    ##		(0 = no crit,  1 = GCV ,  2 = ord.CV , 3 = df-matching)
    icrit <- if(is.na(cv)) 0L else if(cv) 2L else 1L
    dofoff <- df.offset
    if(!missing(df)) { # not when cv was NA
	if(df > 1 && df <= nx) {
	    if(!missing(cv))
		warning("specified both 'df' and 'cv'; will disregard the latter")
	    icrit <- 3L
	    dofoff <- df
	} else warning("you must supply 1 < df <= n,  n = #{unique x} = ", nx)
    }
    iparms <- c(icrit=icrit, ispar=ispar, iter=as.integer(contr.sp$maxit))

    keep.stuff <- FALSE ## << to become an argument in the future
    ans.names <- c("coef","ty","lev","spar","parms","crit","iparms","ier",
                   if(keep.stuff) "scratch")
    fit <- .Fortran(C_rbart,		# code in ../src/qsbart.f
		    as.double(penalty),
		    as.double(dofoff),
		    x = as.double(xbar),
		    y = as.double(ybar),
		    w = as.double(wbar), # changed in the Fortran code
		    ssw = as.double(yssw),
		    as.integer(nx),
		    as.double(knot),
		    as.integer(nk),
		    coef = double(nk),
		    ty = double(nx),
		    lev = double(if(is.na(cv))1L else nx),
		    crit = double(1),
		    iparms = iparms,
		    spar = spar,
		    parms = unlist(contr.sp[1:4]),
		    scratch = double(17L * nk + 1L),
		    ld4  = 4L,
		    ldnk = 1L,
		    ier = integer(1L)
		    )[ans.names]
    ## now we have clobbered wbar, recompute it.
    wbar <- tmp[, 1]

    if(is.na(cv)) lev <- df <- NA
    else {
	lev <- fit$lev
	df <- sum(lev)
	if(is.na(df))
	    stop("NA lev[]; probably smoothing parameter 'spar' way too large!")
    }
    if(fit$ier > 0L ) {
        sml <- fit$spar < 0.5
	wtxt <- paste("smoothing parameter value too",
                      if(sml) "small" else "large")
        if(sml) {
            ## used to give warning too and mean() as below, but that's rubbish
            stop(wtxt)
        } else {
            fit$ty <- rep(mean(y), nx) ## would be df = 1
            df <- 1
            warning(wtxt,"\nsetting df = 1  __use with care!__")
        }
    }
    cv.crit <-
	if(is.na(cv)) NA
	else if(cv) {
	    ww <- wbar
	    ww[!(ww > 0)] <- 1
	    weighted.mean(((y - fit$ty[ox])/(1 - (lev[ox] * w)/ww[ox]))^2, w)
	} else weighted.mean((y - fit$ty[ox])^2, w)/
	    (1 - (df.offset + penalty * df)/n)^2
    pen.crit <- sum(wbar * (ybar - fit$ty)^2)
    fit.object <- list(knot = knot, nk = nk, min = ux[1L], range = r.ux,
		       coef = fit$coef)
    class(fit.object) <- "smooth.spline.fit"
    ## parms :  c(low = , high = , tol = , eps = )
    object <- list(x = ux, y = fit$ty, w = wbar, yin = ybar,
                   data = if(keep.data) list(x = x, y = y, w = w),
		   lev = lev, cv.crit = cv.crit, pen.crit = pen.crit,
                   crit = fit$crit,
                   df = df, spar = fit$spar,
                   lambda = unname(fit$parms["low"]),
                   iparms = fit$iparms, # c(icrit= , ispar= , iter= )
                   fit = fit.object, call = match.call())
    class(object) <- "smooth.spline"
    object
}

fitted.smooth.spline <- function(object, ...) {
    if(!is.list(dat <- object$data))
        stop("need result of smooth.spline(keep.data = TRUE)")
    ## note that object$x == unique(sort(object$data$x))
    object$y[match(dat$x, object$x)]
}

residuals.smooth.spline <-
    function (object, type = c("working", "response", "deviance",
                      "pearson", "partial"), ...)
{
    type <- match.arg(type)
    if(!is.list(dat <- object$data))
        stop("need result of smooth.spline(keep.data = TRUE)")
    r <- dat$y - object$y[match(dat$x, object$x)]
    ## this rest is `as' residuals.lm() :
    res <- switch(type,
                  working = ,
                  response = r,
                  deviance = ,
                  pearson = if (is.null(dat$w)) r else r * sqrt(dat$w),
                  partial = r)
    res <- naresid(object$na.action, res)
    if (type == "partial")
        stop('type = "partial" is not yet implemented')
        ## res <- res + predict(object, type = "terms")
    res
}


print.smooth.spline <- function(x, digits = getOption("digits"), ...)
{
    if(!is.null(cl <- x$call)) {
	cat("Call:\n")
	dput(cl, control=NULL)
    }
    ip <- x$iparms
    cv <- cl$cv
    if(is.null(cv)) cv <- FALSE else if(is.name(cv)) cv <- eval(cv)
    cat("\nSmoothing Parameter  spar=", format(x$spar, digits=digits),
        " lambda=", format(x$lambda, digits=digits),
        if(ip["ispar"] != 1L) paste0("(", ip["iter"], " iterations)"))
    cat("\n")
    cat("Equivalent Degrees of Freedom (Df):", format(x$df,digits=digits))
    cat("\n")
    cat("Penalized Criterion:", format(x$pen.crit, digits=digits))
    cat("\n")
    if(!is.na(cv))
        cat(if(cv) "PRESS: " else "GCV: ",
            format(x$cv.crit, digits = digits), "\n", sep = "")
    invisible(x)
}

predict.smooth.spline <- function(object, x, deriv = 0, ...)
{
    if(missing(x)) {
        if(deriv == 0)
            return(object[c("x", "y")])
        else x <- object$x
    }
    fit <- object$fit
    if(is.null(fit)) stop("not a valid \"smooth.spline\" object")
    else predict(fit, x, deriv, ...)
}

predict.smooth.spline.fit <- function(object, x, deriv = 0, ...)
{
    if(missing(x))
	x <- seq.int(from = object$min, to = object$min + object$range,
                     length.out = length(object$coef) - 4L)
    xs <- (x - object$min)/object$range # x scaled to [0,1]
    extrap.left <- xs < 0
    extrap.right <- xs > 1
    interp <- !(extrap <- extrap.left | extrap.right)
    n <- sum(interp) # number of xs in [0,1]
    y <- xs
    if(any(interp))
	y[interp] <- .Fortran(C_bvalus,
			      n	  = as.integer(n),
			      knot = as.double(object$knot),
			      coef = as.double(object$coef),
			      nk = as.integer(object$nk),
			      x	= as.double(xs[interp]),
			      s	= double(n),
			      order = as.integer(deriv))$s
    if(any(extrap)) {
	xrange <- c(object$min, object$min + object$range)
	if(deriv == 0) {
	    end.object <- Recall(object, xrange)$y
	    end.slopes <- Recall(object, xrange, 1)$y * object$range
	    if(any(extrap.left))
		y[extrap.left] <- end.object[1L] +
		    end.slopes[1L] * (xs[extrap.left] - 0)
	    if(any(extrap.right))
		y[extrap.right] <- end.object[2L] +
		    end.slopes[2L] * (xs[extrap.right] - 1)
	} else if(deriv == 1) {
	    end.slopes <- Recall(object, xrange, 1)$y * object$range
	    y[extrap.left] <- end.slopes[1L]
	    y[extrap.right] <- end.slopes[2L]
	}
	else y[extrap] <- 0
    }
    if(deriv > 0)
	y <- y/(object$range^deriv)
    list(x = x, y = y)
}

supsmu <-
  function(x, y, wt = rep(1, n), span = "cv", periodic = FALSE, bass = 0)
{
    if(span == "cv") span <- 0
    n <- length(y)
    if(!n || !is.numeric(y)) stop("'y' must be numeric vector")
    if(length(x) != n) stop("number of observations in 'x' and 'y' must match.")
    if(length(wt) != n)
	stop("number of weights must match number of observations.")
    if(span < 0 || span > 1) stop("'span' must be between 0 and 1.")
    if(periodic) {
	iper <- 2L
	xrange <- range(x)
	if(xrange[1L] < 0 || xrange[2L] > 1)
	    stop("'x' must be between 0 and 1 for periodic smooth")
    } else iper <- 1L
    okay <- is.finite(x + y + wt)
    ord <- order(x[okay], y[okay])
    ord <- cumsum(!okay)[okay][ord] + ord
    xo <- x[ord]
    leno <- length(ord)
    if(leno == 0L)
        stop("no finite observations")
    if(diff <- n - leno)
        warning(sprintf(ngettext(diff,
                                 "%d observation with NA, NaN or Inf deleted",
                                 "%d observations with NAs, NaNs and/or Infs deleted"),
                        diff), domain = NA)
    .Fortran(C_setsmu)
    smo <- .Fortran(C_supsmu,
		    as.integer(leno),
		    as.double(xo),
		    as.double(y[ord]),
		    as.double(wt[ord]),
		    as.integer(iper),
		    as.double(span),
		    as.double(bass),
		    smo=double(leno),
		    double(n*7L), double(1L))$smo
    ## eliminate duplicate xsort values and corresponding smoothed values
    dupx <- duplicated(xo)
    list(x = xo[!dupx], y = smo[!dupx])
}