#  File src/library/stats/R/loess.R
#  Part of the R package, http://www.R-project.org
#
#  Copyright (C) 1998-2013 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/

loess <-
function(formula, data, weights, subset, na.action, model = FALSE,
	 span = 0.75, enp.target, degree = 2L, parametric = FALSE,
	 drop.square = FALSE, normalize = TRUE,
	 family = c("gaussian", "symmetric"),
	 method = c("loess", "model.frame"),
	 control = loess.control(...), ...)
{
    family <- match.arg(family)
    method <- match.arg(method)
    mf <- match.call(expand.dots=FALSE)
    mf$model <- mf$span <- mf$enp.target <- mf$degree <-
	mf$parametric <- mf$drop.square <- mf$normalize <- mf$family <-
	    mf$method <- mf$control <- mf$... <- NULL
    mf[[1L]] <- quote(stats::model.frame)
    mf <- eval(mf, parent.frame())
    if (match.arg(method) == "model.frame") return(mf)
    mt <- attr(mf, "terms")
    y <- model.response(mf, "numeric")
    w <- model.weights(mf)
    if(is.null(w)) w <- rep_len(1, length(y))
    nmx <- as.character(attr(mt, "variables"))[-(1L:2)]
    x <- mf[, nmx, drop=FALSE]
    if(any(sapply(x, is.factor))) stop("predictors must all be numeric")
    x <- as.matrix(x)
    D <- ncol(x)
    nmx <- setNames(nm = colnames(x))
    drop.square <- match(nmx, nmx[drop.square], 0L) > 0L
    parametric <- match(nmx, nmx[parametric], 0L) > 0L
    if(!match(degree, 0L:2L, 0L)) stop("'degree' must be 0, 1 or 2")
    iterations <- if(family == "gaussian") 1 else control$iterations
    if(!missing(enp.target))
	if(!missing(span))
	    warning("both 'span' and 'enp.target' specified: 'span' will be used")
	else {				# White book p.321
	    tau <- switch(degree+1L, 1, D+1, (D+1)*(D+2)/2) - sum(drop.square)
	    span <- 1.2 * tau/enp.target
	}
    ## Let's add sanity checks on control
    if(!is.list(control) || !is.character(control$surface) ||
       !is.character(control$statistics) || !is.character(control$trace.hat) ||
       !is.numeric(control$cell) || !is.numeric(iterations))
        stop("invalid 'control' argument")
    fit <- simpleLoess(y, x, w, span, degree, parametric, drop.square,
		       normalize, control$statistics, control$surface,
		       control$cell, iterations, control$trace.hat)
    fit$call <- match.call()
    fit$terms <- mt
    fit$xnames <- nmx
    fit$x <- x
    fit$y <- y
    fit$weights <- w
    if(model) fit$model <- mf
    fit$na.action <- attr(mf, "na.action")
    fit
}

loess.control <-
  function(surface = c("interpolate", "direct"),
	   statistics = c("approximate", "exact"),
	   trace.hat = c("exact", "approximate"),
	   cell = 0.2, iterations = 4L, ...)
{
    list(surface=match.arg(surface),
	 statistics=match.arg(statistics),
	 trace.hat=match.arg(trace.hat),
	 cell=cell, iterations=iterations)
}


simpleLoess <-
  function(y, x, weights, span = 0.75, degree = 2L, parametric = FALSE,
	   drop.square = FALSE, normalize = TRUE,
	   statistics = "approximate", surface = "interpolate",
	   cell = 0.2, iterations = 1L, trace.hat = "exact")
{
    ## loess_ translated to R.

    D <- as.integer(NCOL(x))
    if (is.na(D)) stop("invalid NCOL(X)")
    if(D > 4) stop("only 1-4 predictors are allowed")
    N <- as.integer(NROW(x))
    if (is.na(N)) stop("invalid NCOL(X)")
    if(!N || !D)	stop("invalid 'x'")
    if(!length(y))	stop("invalid 'y'")
    x <- as.matrix(x)
    storage.mode(x) <- "double"
    storage.mode(y) <- "double"
    storage.mode(weights) <- "double"
    max.kd <-  max(N, 200)
    robust <- rep_len(1, N)
    divisor <- rep_len(1, D)
    if(normalize && D > 1L) {
	trim <- ceiling(0.1 * N)
	divisor <-
	    sqrt(apply(apply(x, 2L, sort)[seq(trim+1, N-trim), , drop = FALSE],
		       2L, var))
	x <- x/rep(divisor, rep_len(N, D))
    }
    sum.drop.sqr <- sum(drop.square)
    sum.parametric <- sum(parametric)
    nonparametric <- sum(!parametric)
    order.parametric <- order(parametric)
    x <- x[, order.parametric]
    order.drop.sqr <- (2L - drop.square)[order.parametric]
    if(degree == 1L && sum.drop.sqr)
	stop("specified the square of a factor predictor to be dropped when degree = 1")
    if(D == 1L && sum.drop.sqr)
	stop("specified the square of a predictor to be dropped with only one numeric predictor")
    if(sum.parametric == D) stop("specified parametric for all predictors")

    if(iterations)
    for(j in seq_len(iterations)) {
	robust <- weights * robust
	if(j > 1) statistics <- "none"
	else if(surface == "interpolate" && statistics == "approximate")
	    statistics <- if(trace.hat == "exact") "1.approx"
            else "2.approx" # trace.hat == "approximate"
	surf.stat <- paste(surface, statistics, sep = "/")
        if (length(span) != 1L) stop("invalid argument 'span'")
        if (length(cell) != 1L) stop("invalid argument 'cell'")
        if (length(degree) != 1L) stop("invalid argument 'degree'")
	z <- .C(C_loess_raw, # ../src/loessc.c
		y, x, weights, robust, D, N,
		as.double(span),
		as.integer(degree),
		as.integer(nonparametric),
		as.integer(order.drop.sqr),
		as.integer(sum.drop.sqr),
		as.double(span*cell),
		as.character(surf.stat),
		fitted.values = double(N),
		parameter = integer(7L),
		a = integer(max.kd),
		xi = double(max.kd),
		vert = double(2L*D),
		vval = double((D+1L)*max.kd),
		diagonal = double(N),
		trL = double(1L),
		delta1 = double(1L),
		delta2 = double(1L),
		as.integer(surf.stat == "interpolate/exact"))
	if(j==1) {
	    trace.hat.out <- z$trL
	    one.delta <- z$delta1
	    two.delta <- z$delta2
	}
	fitted.residuals <- y - z$fitted.values
	if(j < iterations)
	    robust <- .Fortran(C_lowesw, fitted.residuals, N,
			       robust = double(N), integer(N))$robust
    }
    if(surface == "interpolate")
    {
	pars <- setNames(z$parameter,
			 c("d", "n", "vc", "nc", "nv", "liv", "lv"))
	enough <- (D + 1L) * pars["nv"]
	fit.kd <- list(parameter=pars, a=z$a[1L:pars[4L]], xi=z$xi[1L:pars[4L]],
		       vert=z$vert, vval=z$vval[1L:enough])
    }
    if(iterations > 1L) {
	pseudovalues <- .Fortran(C_lowesp,
				 N,
				 as.double(y),
				 as.double(z$fitted.values),
				 as.double(weights),
				 as.double(robust),
				 integer(N),
				 pseudovalues = double(N))$pseudovalues
	zz <- .C(C_loess_raw,
		as.double(pseudovalues), # ? needed
		x, weights, weights, D, N,
		as.double(span),
		as.integer(degree),
		as.integer(nonparametric),
		as.integer(order.drop.sqr),
		as.integer(sum.drop.sqr),
		as.double(span*cell),
		as.character(surf.stat),
		temp = double(N),
		parameter = integer(7L),
		a = integer(max.kd),
		xi = double(max.kd),
		vert = double(2L*D),
		vval = double((D+1L)*max.kd),
		diagonal = double(N),
		trL = double(1L),
		delta1 = double(1L),
		delta2 = double(1L),
		0L)
	pseudo.resid <- pseudovalues - zz$temp
    }
    sum.squares <- if(iterations <= 1L) sum(weights * fitted.residuals^2)
    else sum(weights * pseudo.resid^2)
    enp <- one.delta + 2*trace.hat.out - N
    s <- sqrt(sum.squares/one.delta)
    pars <- list(robust = robust, span = span, degree = degree,
                 normalize = normalize,
		 parametric = parametric, drop.square = drop.square,
		 surface = surface, cell = cell,
                 family = if(iterations <= 1L) "gaussian" else "symmetric",
		 iterations = iterations)
    fit <- list(n = N, fitted = z$fitted.values, residuals = fitted.residuals,
		enp = enp, s = s, one.delta = one.delta, two.delta = two.delta,
		trace.hat = trace.hat.out, divisor = divisor)
    fit$pars <- pars
    if(surface == "interpolate") fit$kd <- fit.kd
    class(fit) <- "loess"
    fit
}

predict.loess <-
    function(object, newdata = NULL, se = FALSE, na.action = na.pass, ...)
{
    if(!inherits(object, "loess"))
	stop("first argument must be a \"loess\" object")
    if(is.null(newdata) && !se)
	return(fitted(object))

    newx <- if(is.null(newdata)) object$x
    else if(is.data.frame(newdata))
        as.matrix(model.frame(delete.response(terms(object)), newdata,
                              na.action = na.action))
    else as.matrix(newdata) # this case is undocumented
    res <-
        with(object, predLoess(y, x, newx, s, weights, pars$robust,
                               pars$span, pars$degree, pars$normalize,
                               pars$parametric, pars$drop.square, pars$surface,
                               pars$cell, pars$family, kd, divisor, se = se))
    if(!is.null(out.attrs <- attr(newdata, "out.attrs"))) { # expand.grid used
        if(se) {
            res$fit <- array(res$fit, out.attrs$dim, out.attrs$dimnames)
            res$se.fit <- array(res$se.fit, out.attrs$dim, out.attrs$dimnames)
        } else res <- array(res, out.attrs$dim, out.attrs$dimnames)
    }
    if(se)
	res$df <- object$one.delta^2/object$two.delta
    res
}

predLoess <-
  function(y, x, newx, s, weights, robust, span, degree,
	   normalize, parametric, drop.square, surface, cell, family,
	   kd, divisor, se = FALSE)
{
    ## translation of pred_
    D <- NCOL(x); N <- NROW(x); M <- NROW(newx)
    x <- as.matrix(x); newx <- as.matrix(newx)
    newx <- newx/rep(divisor, rep_len(M, D))
    x <- x/rep(divisor, rep_len(N, D))
    sum.drop.sqr <- sum(drop.square)
    nonparametric <- sum(!parametric)
    order.parametric <- order(parametric)
    x <- x[, order.parametric, drop=FALSE]
    x.evaluate <- newx[, order.parametric, drop=FALSE]
    order.drop.sqr <- (2L - drop.square)[order.parametric]
    storage.mode(x) <- "double"
    storage.mode(y) <- "double"
    if(surface == "direct") {
        nas <- rowSums(is.na(newx)) > 0L
        fit <- rep_len(NA_real_, length(nas))
        x.evaluate <- x.evaluate[!nas,, drop = FALSE]
        M <- nrow(x.evaluate)
	if(se) {
            se.fit <- fit
	    z <- .C(C_loess_dfitse,
		    y,
		    x,
		    as.double(x.evaluate),
		    as.double(weights*robust),
		    as.double(robust),
		    as.integer(family =="gaussian"),
		    as.double(span),
		    as.integer(degree),
		    as.integer(nonparametric),
		    as.integer(order.drop.sqr),
		    as.integer(sum.drop.sqr),
		    as.integer(D),
		    as.integer(N),
		    as.integer(M),
		    fit = double(M),
		    L = double(N*M))[c("fit", "L")]
	    fit[!nas] <- z$fit
	    ses <- (matrix(z$L^2, M, N)/rep(weights, rep_len(M,N))) %*% rep_len(1,N)
	    se.fit[!nas] <- drop(s * sqrt(ses))
	} else {
	    fit[!nas] <- .C(C_loess_dfit,
                            y,
                            x,
                            as.double(x.evaluate),
                            as.double(weights*robust),
                            as.double(span),
                            as.integer(degree),
                            as.integer(nonparametric),
                            as.integer(order.drop.sqr),
                            as.integer(sum.drop.sqr),
                            as.integer(D),
                            as.integer(N),
                            as.integer(M),
                            fit = double(M))$fit
	}
    }
    else { ## interpolate
	## need to eliminate points outside original range - not in pred_
	inside <- matrix(FALSE, M, ncol = D)
	ranges <- apply(x, 2L, range)
	inside <- (x.evaluate <= rep(ranges[2L,], rep_len(M, D))) &
	(x.evaluate >= rep(ranges[1L,], rep_len(M, D)))
	inside <- inside %*% rep_len(1, D) == D
        inside[is.na(inside)] <- FALSE
	M1 <- sum(inside)
	fit <- rep_len(NA_real_, M)
	if(any(inside))
	    fit[inside] <- .C(C_loess_ifit,
			      as.integer(kd$parameter),
			      as.integer(kd$a), as.double(kd$xi),
			      as.double(kd$vert), as.double(kd$vval),
			      as.integer(M1),
			      as.double(x.evaluate[inside, ]),
			      fit = double(M1))$fit
	if(se) {
	    se.fit <- rep_len(NA_real_, M)
	    if(any(inside)) {
		L <- .C(C_loess_ise,
			y,
			x,
			as.double(x.evaluate[inside, ]),
			as.double(weights),
			as.double(span),
			as.integer(degree),
			as.integer(nonparametric),
			as.integer(order.drop.sqr),
			as.integer(sum.drop.sqr),
			as.double(span*cell),
			as.integer(D),
			as.integer(N),
			as.integer(M1),
			double(M1),
			L = double(N*M1)
			)$L
		tmp <- (matrix(L^2, M1, N)/rep(weights, rep_len(M1,N))) %*% rep_len(1,N)
		se.fit[inside] <- drop(s * sqrt(tmp))
	    }
	}
    }
    rn <- rownames(newx)
    if(se) {
        if(!is.null(rn)) names(fit) <- names(se.fit) <- rn
        list(fit = fit, se.fit = drop(se.fit), residual.scale = s)
    } else {
        if(!is.null(rn)) names(fit) <- rn
        fit
    }
}

pointwise <- function(results, coverage)
{
    fit <- results$fit
    lim <- qt((1 - coverage)/2, results$df, lower.tail = FALSE) * results$se.fit
    list(fit = fit, lower = fit - lim, upper = fit + lim)
}

print.loess <- function(x, digits = max(3L, getOption("digits") - 3L), ...)
{
    if(!is.null(cl <- x$call)) {
	cat("Call:\n")
	dput(cl, control=NULL)
    }
    cat("\nNumber of Observations:", x$n, "\n")
    cat("Equivalent Number of Parameters:", format(round(x$enp, 2L)), "\n")
    cat("Residual",
	if(x$pars$family == "gaussian")"Standard Error:" else "Scale Estimate:",
	format(signif(x$s, digits)), "\n")
    invisible(x)
}

summary.loess <- function(object, ...)
{
    class(object) <- "summary.loess"
    object
}

print.summary.loess <-
    function(x, digits = max(3L, getOption("digits") - 3L), ...)
{
    if(!is.null(cl <- x$call)) {
	cat("Call:\n")
	dput(cl, control=NULL)
    }
    cat("\nNumber of Observations:", x$n, "\n")
    cat("Equivalent Number of Parameters:", format(round(x$enp, 2)), "\n")
    if(x$pars$family == "gaussian")
	cat("Residual Standard Error:", format(signif(x$s, digits)), "\n")
    else cat("Residual Scale Estimate:", format(signif(x$s, digits)), "\n")
    cat("Trace of smoother matrix:", format(round(x$trace.hat, 2L)), "\n")
    cat("\nControl settings:\n")
    cat("  normalize: ", x$pars$normalize, "\n")
    cat("  span	    : ", format(x$pars$span), "\n")
    cat("  degree   : ", x$pars$degree, "\n")
    cat("  family   : ", x$pars$family)
    if(x$pars$family != "gaussian")
	cat("	    iterations =", x$pars$iterations)
    cat("\n  surface  : ", x$pars$surface)
    if(x$pars$surface == "interpolate")
	cat("	  cell =", format(x$pars$cell))
    cat("\n")
    invisible(x)
}

scatter.smooth <-
    function(x, y = NULL, span = 2/3, degree = 1,
	     family = c("symmetric", "gaussian"),
	     xlab = NULL, ylab = NULL,
	     ylim = range(y, pred$y, na.rm = TRUE),
             evaluation = 50, ..., lpars = list())
{
    xlabel <- if (!missing(x)) deparse(substitute(x))
    ylabel <- if (!missing(y)) deparse(substitute(y))
    xy <- xy.coords(x, y, xlabel, ylabel)
    x <- xy$x
    y <- xy$y
    xlab <- if (is.null(xlab)) xy$xlab else xlab
    ylab <- if (is.null(ylab)) xy$ylab else ylab
    pred <- loess.smooth(x, y, span, degree, family, evaluation)
    plot(x, y, ylim = ylim, xlab = xlab, ylab = ylab, ...)
    do.call(lines, c(list(pred), lpars))
    invisible()
}

loess.smooth <-
  function(x, y, span = 2/3, degree = 1, family = c("symmetric", "gaussian"),
	   evaluation = 50, ...)
{
    notna <- !(is.na(x) | is.na(y))
    new.x <- seq.int(min(x[notna]), max(x[notna]), length.out = evaluation)

    control <- loess.control(...)
    x <- x[notna]; y <- y[notna]
    w <- rep_len(1, length(y))
    family <- match.arg(family)
    iterations <- if(family == "gaussian") 1L else control$iterations
    fit <- simpleLoess(y, x, w, span, degree, FALSE, FALSE,
		       normalize=FALSE, "none", "interpolate",
		       control$cell, iterations, control$trace.hat)
    kd <- fit$kd
    z <- .C(C_loess_ifit,
	    as.integer(kd$parameter),
	    as.integer(kd$a), as.double(kd$xi),
	    as.double(kd$vert), as.double(kd$vval),
	    as.integer(evaluation),
	    as.double(new.x),
	    fit = double(evaluation))$fit
    list(x = new.x, y = z)
}

anova.loess <- function(object, ...)
{
    objects <- list(object, ...)
    responses <- as.character(lapply(objects,
				     function(x) as.character(x$terms[[2L]])))
    sameresp <- responses == responses[1L]
    ## calculate the number of models
    if (!all(sameresp)) {
	objects <- objects[sameresp]
        warning(gettextf("models with response %s removed because response differs from model 1",
                         sQuote(deparse(responses[!sameresp]))),
                domain = NA)
    }
    nmodels <- length(objects)
    if(nmodels <= 1L) stop("no models to compare")
    models <- as.character(lapply(objects, function(x) x$call))
    descr <- paste("Model ", format(1L:nmodels), ": ", models,
		   sep = "", collapse = "\n")
    ## extract statistics
    delta1 <- sapply(objects, function(x) x$one.delta)
    delta2 <- sapply(objects, function(x) x$two.delta)
    s <- sapply(objects, function(x) x$s)
    enp <- sapply(objects, function(x) x$enp)
    rss <- s^2*delta1
    max.enp <- order(enp)[nmodels]
    d1diff <- abs(diff(delta1))
    dfnum <- c(d1diff^2/abs(diff(delta2)))
    dfden <- (delta1^2/delta2)[max.enp]
    Fvalue <- c(NA, (abs(diff(rss))/d1diff)/s[max.enp]^2)
    pr <- pf(Fvalue, dfnum, dfden, lower.tail = FALSE)
    ans <- data.frame(ENP = round(enp,2L), RSS = rss, "F-value" = Fvalue,
		      "Pr(>F)" = pr, check.names = FALSE)
    attr(ans, "heading") <-
	paste0(descr, "\n\n", "Analysis of Variance:   denominator df ",
               format(round(dfden, 2L)), "\n")
    class(ans) <- c("anova", "data.frame")
    ans
}