% File src/library/graphics/man/spineplot.Rd % Part of the R package, http://www.R-project.org % Copyright 1995-2007 R Core Team % Distributed under GPL 2 or later \name{spineplot} \alias{spineplot} \alias{spineplot.default} \alias{spineplot.formula} \title{Spine Plots and Spinograms} \description{ Spine plots are a special cases of mosaic plots, and can be seen as a generalization of stacked (or highlighted) bar plots. Analogously, spinograms are an extension of histograms. } \usage{ spineplot(x, \dots) \method{spineplot}{default}(x, y = NULL, breaks = NULL, tol.ylab = 0.05, off = NULL, ylevels = NULL, col = NULL, main = "", xlab = NULL, ylab = NULL, xaxlabels = NULL, yaxlabels = NULL, xlim = NULL, ylim = c(0, 1), axes = TRUE, \dots) \method{spineplot}{formula}(formula, data = NULL, breaks = NULL, tol.ylab = 0.05, off = NULL, ylevels = NULL, col = NULL, main = "", xlab = NULL, ylab = NULL, xaxlabels = NULL, yaxlabels = NULL, xlim = NULL, ylim = c(0, 1), axes = TRUE, \dots, subset = NULL) } \arguments{ \item{x}{an object, the default method expects either a single variable (interpreted to be the explanatory variable) or a 2-way table. See details.} \item{y}{a \code{"factor"} interpreted to be the dependent variable} \item{formula}{a \code{"formula"} of type \code{y ~ x} with a single dependent \code{"factor"} and a single explanatory variable.} \item{data}{an optional data frame.} \item{breaks}{if the explanatory variable is numeric, this controls how it is discretized. \code{breaks} is passed to \code{\link{hist}} and can be a list of arguments.} \item{tol.ylab}{convenience tolerance parameter for y-axis annotation. If the distance between two labels drops under this threshold, they are plotted equidistantly.} \item{off}{vertical offset between the bars (in per cent). It is fixed to \code{0} for spinograms and defaults to \code{2} for spine plots.} \item{ylevels}{a character or numeric vector specifying in which order the levels of the dependent variable should be plotted.} \item{col}{a vector of fill colors of the same length as \code{levels(y)}. The default is to call \code{\link{gray.colors}}.} \item{main, xlab, ylab}{character strings for annotation} \item{xaxlabels, yaxlabels}{character vectors for annotation of x and y axis. Default to \code{levels(y)} and \code{levels(x)}, respectively for the spine plot. For \code{xaxlabels} in the spinogram, the breaks are used.} \item{xlim, ylim}{the range of x and y values with sensible defaults.} \item{axes}{logical. If \code{FALSE} all axes (including those giving level names) are suppressed.} \item{\dots}{additional arguments passed to \code{\link{rect}}.} \item{subset}{an optional vector specifying a subset of observations to be used for plotting. } } \details{ \code{spineplot} creates either a spinogram or a spine plot. It can be called via \code{spineplot(x, y)} or \code{spineplot(y ~ x)} where \code{y} is interpreted to be the dependent variable (and has to be categorical) and \code{x} the explanatory variable. \code{x} can be either categorical (then a spine plot is created) or numerical (then a spinogram is plotted). Additionally, \code{spineplot} can also be called with only a single argument which then has to be a 2-way table, interpreted to correspond to \code{table(x, y)}. Both, spine plots and spinograms, are essentially mosaic plots with special formatting of spacing and shading. Conceptually, they plot \eqn{P(y | x)} against \eqn{P(x)}. For the spine plot (where both \eqn{x} and \eqn{y} are categorical), both quantities are approximated by the corresponding empirical relative frequencies. For the spinogram (where \eqn{x} is numerical), \eqn{x} is first discretized (by calling \code{\link{hist}} with \code{breaks} argument) and then empirical relative frequencies are taken. Thus, spine plots can also be seen as a generalization of stacked bar plots where not the heights but the widths of the bars corresponds to the relative frequencies of \code{x}. The heights of the bars then correspond to the conditional relative frequencies of \code{y} in every \code{x} group. Analogously, spinograms extend stacked histograms. } \value{ The table visualized is returned invisibly. } \seealso{ \code{\link{mosaicplot}}, \code{\link{hist}}, \code{\link{cdplot}} } \references{ Friendly, M. (1994), Mosaic displays for multi-way contingency tables. \emph{Journal of the American Statistical Association}, \bold{89}, 190--200. Hartigan, J.A., and Kleiner, B. (1984), A mosaic of television ratings. \emph{The American Statistician}, \bold{38}, 32--35. Hofmann, H., Theus, M. (2005), \emph{Interactive graphics for visualizing conditional distributions}, Unpublished Manuscript. Hummel, J. (1996), Linked bar charts: Analysing categorical data graphically. \emph{Computational Statistics}, \bold{11}, 23--33. } \author{ Achim Zeileis \email{Achim.Zeileis@R-project.org} } \examples{ ## treatment and improvement of patients with rheumatoid arthritis treatment <- factor(rep(c(1, 2), c(43, 41)), levels = c(1, 2), labels = c("placebo", "treated")) improved <- factor(rep(c(1, 2, 3, 1, 2, 3), c(29, 7, 7, 13, 7, 21)), levels = c(1, 2, 3), labels = c("none", "some", "marked")) ## (dependence on a categorical variable) (spineplot(improved ~ treatment)) ## applications and admissions by department at UC Berkeley ## (two-way tables) (spineplot(margin.table(UCBAdmissions, c(3, 2)), main = "Applications at UCB")) (spineplot(margin.table(UCBAdmissions, c(3, 1)), main = "Admissions at UCB")) ## NASA space shuttle o-ring failures fail <- factor(c(2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1), levels = c(1, 2), labels = c("no", "yes")) temperature <- c(53, 57, 58, 63, 66, 67, 67, 67, 68, 69, 70, 70, 70, 70, 72, 73, 75, 75, 76, 76, 78, 79, 81) ## (dependence on a numerical variable) (spineplot(fail ~ temperature)) (spineplot(fail ~ temperature, breaks = 3)) (spineplot(fail ~ temperature, breaks = quantile(temperature))) ## highlighting for failures spineplot(fail ~ temperature, ylevels = 2:1) } \keyword{hplot}