library(happy.hbrem)
#library(multicore)
library(parallel)
library(survival)
library(hash)

# HAPPY.preCC.R
# (C) Richard Mott 2010; richard.mott@well.ox.ac.uk
# Distributed under the GPL
# This code contains functions to analyse QTLs in MAGIC recombinant inbred lines. 
# See http:/mus.well.ox.ac.uk/magic for instructions.

make.condensed.genome <- function( chrs, prefix, gdir= "./", dir="./CONDENSED/", thin=20, extension=".data", gen=7, haploid=FALSE, mc.cores=10) {

  lapply( chrs, make.condensed.chr.database, prefix=prefix, gdir=gdir, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid)
#  mclapply( chrs, make.condensed.chr.database, prefix=prefix, gdir=gdir, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid, mc.cores=mc.cores)
  if ( haploid ) 
    models = c( "additive" )
  else 
    models = c( "additive", "full" )

  db = list( chrs=chrs, prefix=prefix, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid, models=models, chrs=chrs )
  save( db, file=paste( dir, "/db.RData", sep=""))
}


make.condensed.chr.database <- function( chr, prefix, gdir="./", dir="./CONDENSED/", thin=20, extension=".data", gen=7, haploid=FALSE) {

  dir.create(dir, recursive=TRUE, show=FALSE) 
  
  pedfile = paste(gdir, chr, prefix, extension, sep="")
  allelesfile = paste( gdir, chr, prefix, ".alleles", sep="")
  mapfile = paste( gdir, chr, prefix, ".map", sep="")
  cat( pedfile, file.exists(pedfile), allelesfile, file.exists(allelesfile), mapfile, file.exists(mapfile), "\n")
  h = happy( pedfile, allelesfile, gen=gen, file="ped", mapfile=mapfile, haploid=haploid)

  if ( haploid ) 
    models = c( "additive" )
  else 
    models = c( "additive", "full" )

  subjects=h$subjects
  strains=h$strains
  save(subjects, file=paste(dir, "/subjects.RData", sep=""))	
  save(strains, file=paste(dir, "/strains.RData", sep=""))	
  condensed.params = c(thin=thin, pedfile=pedfile, allelesfile=allelesfile, mapfile=mapfile, gen=gen, haploid=haploid, models=models)
  save(condensed.params, file=paste(dir, "/", chr, ".param.RData", sep=""))		
  subdir = paste(dir, "/chr/", sep="")
  dir.create(subdir, recursive=TRUE, show=FALSE) 

  for( model in models ) {
    scandir = paste(subdir, model, "/", sep="")	
    dir.create(scandir, recursive=TRUE, show=FALSE) 

    condensed = make.condensed.matrices( h, thin, model=model )
    Rfile = paste(scandir, chr, ".RData", sep="")  
    save(condensed, file=Rfile)
    x = sapply( condensed, function( x ) { return (c( x$from, x$to, x$from.marker, x$to.marker, x$len ))})
    condensed.summary = data.frame(t(x))
    names(condensed.summary) = c( "from", "to", "from.marker", "to.marker", "width")

    condensed.summary$from.bp=h$bp[match(condensed.summary$from.marker,h$marker)]
    condensed.summary$to.bp=h$bp[match(condensed.summary$to.marker,h$marker)]
    condensed.summary$chromosome=rep(chr,nrow(condensed.summary))
    save(condensed.summary, file=paste(scandir, chr, ".condensed.summary.RData", sep=""))
    genome = data.frame(marker=h$markers,chromosome=h$chromosome,bp=h$bp,cm=h$map)
    save(genome, file=paste(scandir,  chr, ".summary.RData", sep=""))
  }
  
}	

                                        # loads a pre-created database of condensed HAPPY design matrices into memory
                                        # returns an object of class "condensed.happy" with the following fields
                                        # subjects
                                        # strains
                                        # chr$additive$$chr$chr1 ....chr$additive$chr19 ....
                                        # additive$condensed.summary
                                        # chr$full$chr$chr1 ....chr$full$chr19 ....


load.condensed.database <- function( dir="./CONDENSED/" ) {

  load( paste(dir, "/db.RData",sep=""))

  load( paste(dir, "/strains.RData", sep=""))
  db$strains = strains;
  
  load( paste(dir, "/subjects.RData", sep=""))
  db$subjects = subjects;

  cat( "loading condensed db", dir, "\n")

  models = db$models 
  for( model in models ) {
    n = 0;
    db[[model]] = list()
    db[[model]]$chr = list()
    db[[model]]$matrices = list()
    h = hash()
    files = paste(dir, "/chr/", model, "/", db$chrs, ".RData", sep="")
    for ( i in 1:length(files)) {
      chr.data = files[i]
      load( chr.data )
      db[[model]]$chr[[db$chrs[i]]]=condensed
      db[[model]]$matrices = c( db[[model]]$matrices, condensed) 
      n = n + length(db[[model]]$chr[[db$chrs[i]]])
      lapply( condensed, function( item ) { h[[item$from.marker]] <<- item$mat } )
    }
    db[[model]]$hash = h
    cat( "model", model, "read ", n, " matrices\n");
    cat( "loading genome summary\n")
    db[[model]]$summary = NULL
    files = paste(dir, "/chr/", model, "/", db$chrs, ".summary.RData", sep="")
    for (chr.summary in files ) {
      load( chr.summary )
      db[[model]]$summary = rbind( db[[model]]$summary, genome )
    }

    cat ( "loading condensed summary\n")
    db[[model]]$condensed.summary = NULL
    files = paste(dir, "/chr/", model, "/", db$chrs, ".condensed.summary.RData", sep="")
    for (condensed.chr.summary in files ) {
      load( condensed.chr.summary )
      db[[model]]$condensed.summary = rbind( db[[model]]$condensed.summary, condensed.summary )
    }

# create some data objects so that the resulting object looks like a genome.cache...

    idx = match( db[[model]]$condensed.summary$from.marker, db[[model]]$summary$marker)
    db[[model]]$subjects = subjects
    db[[model]]$subjects = strains
    db[[model]]$genome = db[[model]]$summary[idx,]
    db[[model]]$genome$map = db[[model]]$genome$cm

    db[[model]]$markers = db[[model]]$genome$marker
    db[[model]]$chromosome = db[[model]]$genome$chromosome
    db[[model]]$map = db[[model]]$genome$map
  }
  load( paste(dir, "/strains.RData", sep=""))
  db$strains = strains;
  
  load( paste(dir, "/subjects.RData", sep=""))
  db$subjects = subjects;
  
  class(db) = "condensed.happy";
  return(db)
}

condensed.hdesign <- function( db, marker, model="additive" ) {
	return(db[[model]]$hash[[marker]])
}

condensed.genome.scan.lm <- function( condensed.db, phen.data, null.model, permute=0, model="additive", phen.name = NULL ) {

  if ( class(condensed.db) != "condensed.happy" ) {
    warning( "condensed.db not of class condensed.happy\n")
    return(NULL)
  }


  int = intersect( condensed.db$subjects, unique(phen.data$CC.Line))
  cat( length(int), "subjects analysed with phenotypes\n")


#  phen.data = phen.data[phen.data$CC.Line %in% int,]
  phen.data = phen.data[match( int, phen.data$CC.Line ,nomatch=0),]

  if ( !is.null(phen.name)) 
    hist(phen.data[[phen.name]], xlab=phen.name, main=phen.name)

  permuted.data=NULL
  if ( is.numeric(permute) & permute>0) {
    f = lm( as.formula(null.model), data=phen.data, y=TRUE)
    permuted.data = replicate( permute, sample(residuals(f)))
    permuted.data = cbind( residuals(f), permuted.data)
  }
  
  db.use = match(int, phen.data$CC.Line, nomatch=0)

  fit.null = lm( null.model, data=phen.data );
  print(anova(fit.null))

  genetic.model = paste( as.character(null.model), " + d")
  result = condensed.db[[model]]$condensed.summary


  if ( is.numeric(permute) & permute>0) {
    chr = condensed.db[[model]]$chr
    perm.result = NULL

    for ( i in 1:length(chr)) {
      mats = chr[[i]]
      plp = scan.chromosome.lm( mats, db.use, genetic.model, phen.data, permuted.data, fit.null)
      perm.result = rbind(perm.result,t(plp))
    }

    result$logP = perm.result[,1]
    
    result$pointwise.permute.pval = apply( perm.result, 1, function(w) {sum(w[1]<=w[2:length(w)])} )/permute
    genomewide.distribution = sort(apply( perm.result[,2:ncol(perm.result)], 2, max ))
    result$genomewide.permute.pval = sapply( result$logP, function(x,genomewide.max) { sum(x <= genomewide.max)}, genomewide.max)/permute
    return( list(result=result, genomewide.distribution=genomwide.distribution) )
  }
  else {
    logP = NULL
    chr = condensed.db[[model]]$chr

    for ( i in 1:length(chr)) {
      mats = chr[[i]]
      lp = sapply( mats, function( e, db.use, genetic.model, phen.data, fit.null, subjects) {

        d = e$mat
        rownames(d) = subjects
        d = d[db.use,]
        fit.genetic = lm( as.formula(genetic.model), data=phen.data) 
        a = anova(fit.null, fit.genetic)
        return( -log10(a[2,6]))
      }
        , db.use, genetic.model, phen.data, fit.null, condensed.db$subjects)
      logP = c( logP, lp )		
    }
    result$logP = logP
    return(list(result=result))
  }  

}

scan.chromosome.lm <- function( mats, db.use, genetic.model, phen.data, permuted.data, fit.null ) {
  return( sapply( mats, test.locus.lm, db.use, genetic.model, phen.data, permuted.data, fit.null ))
}


scan.chromosome.survival <- function( mats, db.use, genetic.model, phen.data, fit.null, subjects ) {
  cat ("scan.chromosome.survival\n");
  return( sapply( mats, test.locus.survival, db.use, genetic.model, phen.data, fit.null, subjects ))
}

test.locus.name.lm <- function( locus, db, db.use, genetic.model, phen.data, permuted.data, fit.null ) {
	d = condensed.hdesign(db, locus)
        return(test.locus.internal.lm( d, db.use, genetic.model, phen.data, permuted.data, fit.null ) )
}
  
test.locus.lm <- function( e, db.use, genetic.model, phen.data, permuted.data, fit.null) {

  d = e$mat
  return(test.locus.internal.lm( d, db.use, genetic.model, phen.data, permuted.data, fit.null ) )
}

test.locus.internal.lm <- function( d, db.use, genetic.model, phen.data, permuted.data, fit.null )  {

  d = d[db.use,]
  fit.genetic = lm( as.formula(genetic.model), data=phen.data, qr=TRUE, y=TRUE)
  
  a = anova(fit.null, fit.genetic)
  y = fit.genetic$y
  y = y-mean(y)
  tss = sum(y*y)
  n = length(y)-fit.null$rank
  df = fit.genetic$rank - fit.null$rank
  rss = apply( permuted.data, 2, function( y, qr ) { r = qr.resid(qr,y); return(sum(r*r)) } , fit.genetic$qr )
  fss = tss-rss; 
  f.stat=(fss/df)/(rss/(n-df)); 
  
  p = -pf( f.stat, df, n-df, lower=FALSE, log=TRUE)/log(10)
  
                                        #    return( c(-log10(a[2,6]), p))
  return( p)
}

test.locus.survival <- function( e, db.use, genetic.model, phen.data, fit.null, subjects ) {


  d = e$mat
  rownames(d) = subjects
  d = d[db.use,]
  phen.data$d = d
  fit.genetic = survreg( as.formula(genetic.model), data=phen.data)
  
  a = anova(fit.null, fit.genetic)

                                        #    return( c(-log10(a[2,6]), p))
  return( -log10(a[2,7]))
}

multicore.condensed.genome.scan.lm <- function( condensed.db, phen.data, null.model, permute=0, model="additive", mc.cores=8, subject.name="CC.Line" ) {

  if ( class(condensed.db) != "condensed.happy" ) {
    warning( "condensed.db not of class condensed.happy\n")
    return(NULL)
  }


  int = intersect( condensed.db$subjects, unique(phen.data[[subject.name]]))
  cat( length(int), "subjects analysed with phenotypes\n")


  phen.data = phen.data[match(int, phen.data[[subject.name]],nomatch=0),]

  permuted.data=NULL
  if ( is.numeric(permute) & permute>0) {
    f = lm( as.formula(null.model), data=phen.data, y=TRUE)
    permuted.data = replicate( permute, sample(residuals(f)))
    permuted.data = cbind( residuals(f), permuted.data)
  }
  
  db.use = match(int, condensed.db$subjects,nomatch=0)

  fit.null = lm( null.model, data=phen.data );
  print(anova(fit.null))

  genetic.model = paste( paste(as.character(null.model)), " + d", collapse=" ")
  result = condensed.db[[model]]$condensed.summary

  if ( is.numeric(permute) & permute>0) {
    chr = condensed.db[[model]]$chr
    perm.result = NULL
#    plps = lapply(chr, scan.chromosome.lm, db.use, genetic.model, phen.data, permuted.data, fit.null )
    plps = mclapply(chr, scan.chromosome.lm, db.use, genetic.model, phen.data, permuted.data, fit.null, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores )

    for ( i in 1:length(plps)) {
      perm.result = rbind(perm.result,t(plps[[i]]))
    }

    result$logP = perm.result[,1]
    
    result$pointwise.permute.pval = apply( perm.result, 1, function(w) {sum(w[1]<=w[2:length(w)])} )/permute
    genomewide.distribution = sort(apply( perm.result[,2:ncol(perm.result)], 2, max ))
    result$genomewide.permute.pval = sapply( result$logP, function(x,genomewide.distribution) { sum(x <= genomewide.distribution)}, genomewide.distribution)/permute

    return( list(result=result, genomewide.distribution=genomewide.distribution) )
  }
  else {
    logP = NULL
    chr = condensed.db[[model]]$chr

    for ( i in 1:length(chr)) {
      mats = chr[[i]]
      lp = sapply( mats, function( e, db.use, genetic.model, phen.data, fit.null, subjects) {

        d = e$mat
        rownames(d) = subjects
        d = d[db.use,]
        fit.genetic = lm( as.formula(genetic.model), data=phen.data) 
        a = anova(fit.null, fit.genetic)
        return( -log10(a[2,6]))
      }
        , db.use, genetic.model, phen.data, fit.null, condensed.db$subjects)
      logP = c( logP, lp )		
    }
    result$logP = logP
    return(list(result=result))
  }  

}

multicore.condensed.genome.scan.survival <- function( condensed.db, phen.data, null.model, permute=0, model="additive", mc.cores=8 ) {

  if ( class(condensed.db) != "condensed.happy" ) {
    warning( "condensed.db not of class condensed.happy\n")
    return(NULL)
  }


  int = intersect( condensed.db$subjects, unique(phen.data$CC.Line))
  cat( length(int), "subjects analysed with phenotypes\n")


  phen.data = phen.data[phen.data$CC.Line %in% int,]
  null.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ Sex")
  base.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ 1")
  db.use = phen.data$CC.Line

  fit.null = survreg( null.model, data=phen.data );
  fit.base = survreg( base.model, data=phen.data );
  print(anova(fit.base,fit.null))

  genetic.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ Sex + d")
  result = condensed.db[[model]]$condensed.summary

  chr = condensed.db[[model]]$chr
  plps = mclapply(chr, scan.chromosome.survival, db.use, genetic.model, phen.data, fit.null, condensed.db$subjects, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores )

  logP = NULL
  for ( i in 1:length(plps))
      logP = c( logP, plps[[i]])
  
  result$logP = logP
    
  return( list(result=result)) 
  }  	


make.condensed.matrices <-function( h, thin=100, model="additive" ) {
  nm = length(h$markers)-thin
  condensed = lapply( seq(1,nm,thin), function ( m, h, thin ) {
    mat = hdesign( h, m, model=model )
    m2=m
    if ( thin > 1 ) {
      m1 = m+1
      m2 = m+thin-1
      if ( m2 >= length(h$markers)) m2 = length(h$markers)-1
      denom = m2-m+1

      for( i in m1:m2) {
        mat = mat + hdesign( h, i, model=model )
      }
      mat = mat/thin
    }      
                                  #		cat( m, m2, h$markers[m],h$markers[m2],thin, "\n")
    return( list( mat=mat, from=m, to=m2, from.marker=h$markers[m], to.marker=h$markers[m2], len=thin ))
  }, h, thin )
  return(condensed)
}


read.phenotypes <- function( data.file, condense=TRUE, condense.phenotype=NULL ) {

  df = read.delim(data.file)
  nam = names(df)
  err=0
  if ( is.null(df$CC.Line) ) {
    warning( "file ", data.file, "has no column CC.Line\n")
    err = err+1
  }
  if ( is.null(df$sex) ) {
    warning( "file ", data.file, "has no column sex\n")
    err = err+1
  }
  if ( err > 0)  return(NULL)
  
  
  if ( condense ) {
    condensed = list()
    CC.lines = unique(as.character(df$CC.Line))

    if ( is.null(condense.phenotype)) {
      for ( col in names(df)) {
        if ( is.numeric(df[[col]]) ) {
          a = aggregate( df[[col]], by=list(CC.Line=df$CC.Line, sex=df$sex), FUN="median")
          condensed[[col]] = a$x[match(CC.lines,a$CC.Line)];
        } else condensed[[col]] = df[match(CC.lines, df$CC.Line),col] 
      }
    }
    else {
      if ( is.numeric(df[[condense.phenotype]]) ) {
        a = aggregate( df[[condense.phenotype]], by=list(CC.Line=df$CC.Line), FUN=function(x) { quantile( x, p=c(0.5), type=1 ) } )
        idx = which( a$CC.Line==a$CC.Line & a[[condense.phenotype]]==a$x)
        
        condensed = a[idx,]
      } else {
        warning( "condensed phenotype ", condense.phenotype, "not numeric\n")
        return(NULL)
      }
    }
    
    return(data.frame(condensed))
  } else {
    return(df)
  }
}

scan.phenotypes.lm <- function( condensed.db, data.file, phen, permute=0, histogram.pdf=NULL, covariates = c("Sex"), mc.cores=10 ) {

  phen.data = read.delim(data.file)
  if ( !is.null(covariates)) {
        cols = c( "CC.Line", phen)
	null.model = paste( "~", paste( covariates,  sep=" + "))
  }
  else {
	null.model = "~ 1"
        cols = c( "CC.Line", phen, covariates )
  }
  err=0
  for ( p in cols ) {
    if ( is.null( phen.data[[p]] ) ) {
      warning( "file ", data.file, " has no column ", p, "\n")
      err = err+1
    }
  }

  if ( err > 0)  return(NULL)

  scans = list()

  phens = c()
  for ( p in phen ) {
    cat( p, "\n")
    if ( is.numeric(phen.data[[p]]) ) {
      phens = c(phens, p)
      phen.data[[p]] = ifelse( phen.data[[p]] == 0.0 , NA, phen.data[[p]] )
      agg.phen.data = aggregate( phen.data[[p]], by=list(CC.Line=phen.data$CC.Line, Sex=phen.data$Sex), FUN="median")
      agg.phen.data[[p]] = agg.phen.data$x

#      scan = condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( p, null.model), permute=permute, phen.name=phen.name)
      scan = multicore.condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( p, null.model), permute=permute, mc.cores=mc.cores)
      write.table(scan$result, file=paste(p, ".scan.txt", sep=""), quote=FALSE, row=FALSE, sep="\t")
      scans[[p]] = scan
    }
  }

  plot.histograms( histogram.pdf, agg.phen.data, phens  ) 

  return(scans)
}
survival.median <- function(y) { 
  quantile( y, probs=0.5, type=1, na.rm=TRUE); 
}


scan.survival <- function( condensed.db, data.file, surv.time, surv.censored=28, permute=0, histogram.pdf=NULL, mc.cores=10 ) {

  phen.data = read.delim(data.file)
  null.model = "~ Sex"
  err=0
  for ( p in c( "CC.Line", "Sex", surv.time )) {
    if ( is.null( phen.data[[p]] ) ) {
      warning( "file ", data.file, " has no column ", p, "\n")
      err = err+1
    }
  }

  if ( err > 0)  return(NULL)

  scans = list()
  if ( is.numeric(phen.data[[surv.time]]) ) {
   agg.phen.data = aggregate( phen.data[[surv.time]], by=list(CC.Line=phen.data$CC.Line, Sex=phen.data$Sex), FUN="median" )
   agg.phen.data$death = agg.phen.data$x < surv.censored
   s = Surv( agg.phen.data$x, agg.phen.data$death )
#  scan = condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( , null.model), permute=permute, phen.name=phen.name)
   scan = multicore.condensed.genome.scan.survival( condensed.db, agg.phen.data,  null.model, permute=permute, mc.cores=mc.cores)
   write.table(scan$result, file=paste(p, ".scan.txt", sep=""), quote=FALSE, row=FALSE, sep="\t")
     scans[[p]] = scan
  }
  phen.data.plot = data.frame(x = agg.phen.data$x)
  names(phen.data.plot) = c( surv.time)
  plot.histograms( histogram.pdf, phen.data.plot, c(surv.time) ) 

  return(scans)
}


plot.histograms <- function( histogram.pdf, phen.data, phen ) {
  if ( ! is.null(histogram.pdf)) {
    pdf(histogram.pdf)
    par(mfrow=c(3,3))
    for ( p in phen ) {
      if ( ! is.null(phen.data[[p]]))
        hist(phen.data[[p]], xlab=p, main=p)
    }
    dev.off()
  }
}

plot.scans <- function ( db, scans, pdffile=NULL,q = c( 0.5, 0.9, 0.95 ) ) {

  if ( ! is.null(pdffile)) pdf( pdffile ) 
  par(mfrow=c(4,1),cex=0.4)	

  for( p in names(scans)) {
    scan = scans[[p]]$result

    quantiles = scans[[p]]$genomewide.distribution[ floor( length(scans[[p]]$genomewide.distribution)*q) ]
    my = max(quantiles,scan$logP)	
    chr = scan$chromosome[2:nrow(scan)]
    boundary = c(1, which(chr != scan$chromosome[1:nrow(scan)-1]))
    boundary2 = c(boundary[2:length(boundary)],nrow(scan))
    xc = 0.5*(boundary+boundary2)
    plot( 1:nrow(scan), scan$logP, t="l", ylim=c(0,my*1.05),main=p, ylab="logP", xaxs="i", xaxt="n", xlab="position")
    abline( v=boundary, col="red")
    abline(h=quantiles,col="grey")
    axis(1, at=xc, labels=chr[boundary])

    if ( !is.null(scans[[p]]$qtls) ) {
	qtls = scans[[p]]$qtls[scans[[p]]$qtls$phenotype==p,]
	x = match( qtls$peak.SNP, scan$from.marker)
	points(x,rep(my,length(x)),pch=20,col="orange")
    }
  }
  if ( ! is.null(pdffile)) dev.off()

}


# HBREM

scan.chromosome.hbrem <- function( loci, db.use, phen.data, HaploidInd, Ndip, Nind, Npost, Nbin) {

  return(sapply( loci, test.locus.hbrem, phen.data, db.use, HaploidInd, Ndip, Nind, Npost, Nbin) )
}

test.locus.hbrem <- function(locus, phen.data, db.use, HaploidInd, Ndip, Nind, Npost, Nbin) {

    d = locus$mat[db.use,]

    hb <- hbrem(RX=d, HaploidInd=HaploidInd, Ndip=Ndip, Nind=Nind, Npost=Npost, Nbin=Nbin, Ry=phen.data)

    reg.full.lm <- lm(phen.data ~ d)
    reg.null.lm <- lm(phen.data ~ 1)
    Ftest <- anova(reg.null.lm, reg.full.lm)
    pval <- Ftest$"Pr(>F)"[2]

#    cat( locus$from.marker, -log10(pval), hb[[1]][12],"\n" )
    return( c(  -log10(pval), hb[[1]], hb[[2]], hb[[3]], hb[[4]] ))
  }

multicore.condensed.genome.scan.hbrem <- function( condensed.db, phen.data, mc.cores=8, haploid=FALSE  ) {


  if ( class(condensed.db) != "condensed.happy" ) {
    warning( "condensed.db not of class condensed.happy\n")
    return(NULL)
  }

  Npost <- 2000
  Nbin <- 2
  HaploidInd <- ifelse( haploid, 0, 1)

  if ( HaploidInd == 0 ) {
    model = "additive"
    Ndip = length(condensed.db$strains)
  }
  else {
    model = "full"
    ns = length(condensed.db$strains)
    Ndip = ns*(ns+1)/2
  }

  int = intersect( condensed.db$subjects, unique(phen.data$SUBJECT.NAME))
  cat( length(int), "subjects analysed with phenotypes\n")

  phen.data = phen.data[phen.data$SUBJECT.NAME %in% int,]
  db.use = match( phen.data$SUBJECT.NAME, condensed.db$subjects)
  Nind = nrow(phen.data)
  
  result = condensed.db[[model]]$condensed.summary

  chr = condensed.db[[model]]$chr
  plps = mclapply(chr, scan.chromosome.hbrem, db.use, phen.data$x, HaploidInd, Ndip, Nind, Npost, Nbin, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores )
#  plps = lapply(chr, scan.chromosome.hbrem, db.use, phen.data$x, HaploidInd, Ndip, Nind, Npost, Nbin)

  res = t(do.call( "cbind", plps))

  mark.pars.df <- data.frame(res[,1:46])
  names(mark.pars.df) = c( "F.logPval", "Hbar", "sd.Ni", "BIC.qtl", "BIC.null", "BF", "logBF", "DIC.qtl", "DIC.null", "DIC.diff", "pd.qtl", "pd.null", "mode.kT", "ga", "gb", "mode.var", "med.kT", "med.mu", "med.var", "mean.kT", "mean.mu", "mean.var", "hpd.kT.50.lower", "hpd.kT.50.upper", "hpd.mu.50.lower", "hpd.mu.50.upper", "hpd.var.50.lower", "hpd.var.50.upper", "hpd.kT.75.lower", "hpd.kT.75.upper", "hpd.mu.75.lower", "hpd.mu.75.upper", "hpd.var.75.lower", "hpd.var.75.upper", "hpd.kT.95.lower", "hpd.kT.95.upper", "hpd.mu.95.lower", "hpd.mu.95.upper", "hpd.var.95.lower", "hpd.var.95.upper", "hpd.kT.99.lower", "hpd.kT.99.upper", "hpd.mu.99.lower", "hpd.mu.99.upper", "hpd.var.99.lower", "hpd.var.99.upper")

  offset = ncol(mark.pars.df)
  mark.pars.df$Name=condensed.db[[model]]$genome$marker 
  mark.pars.df$Chr = condensed.db[[model]]$genome$chromosome
  mark.pars.df$Bp = condensed.db[[model]]$genome$bp

  bp2 = mark.pars.df$Bp[2:length(mark.pars.df$Bp)]
  bidx = which(bp2 < mark.pars.df$Bp[1:(length(mark.pars.df$Bp)-1)])-1

  mark.pars.df$CumBp = rep(0,nrow(mark.pars.df))
  mark.pars.df$CumBp[bidx+2] = mark.pars.df$Bp[bidx+1]
  mark.pars.df$CumBp = cumsum(mark.pars.df$CumBp) + mark.pars.df$Bp
         
  strain.means.df <- data.frame( res[,offset:(offset+Ndip-1)])
  strain.sdevs.df <- data.frame( res[,(offset+Ndip):(offset+2*Ndip-1)])
  strain.avNis.df <- data.frame( res[,(offset+2*Ndip):(offset+3*Ndip-1)])

  if ( HaploidInd ==0 ) {
   names(strain.means.df) <- condensed.db$strains
   names(strain.sdevs.df) <- condensed.db$strains
   names(strain.avNis.df) <- condensed.db$strains
  }
  else {
   strain.names = condensed.db$strains
   num.strains = length(condensed.db$strains)
   diplotype.names <- matrix(kronecker(strain.names, strain.names, paste, sep="."), nrow=num.strains)
   names.full.symmetric <- c( diag(diplotype.names), diplotype.names[upper.tri(diplotype.names, diag=FALSE)])
   names(strain.means.df) <- names.full.symmetric
   names(strain.sdevs.df) <- names.full.symmetric
   names(strain.avNis.df) <- names.full.symmetric
  }


  hbrem.region.list <- list(Summary.Parameters=mark.pars.df, Group.Means=strain.means.df, Group.StDevs=strain.sdevs.df, Group.ExpCounts=strain.avNis.df)

  return(hbrem.region.list)

  }  	

scan.hbrem <- function( condensed.db, data.file, phen.names, haploid=TRUE, mc.cores=10, subject.name="SUBJECT.NAME" ) {

  phen.data = read.delim(data.file)
  err=0
  for ( p in c( subject.name, phen.names )) {
    if ( is.null( phen.data[[p]] ) ) {
      warning( "file ", data.file, " has no column ", p, "\n")
      err = err+1
    }
  }

  if ( err > 0)  return(NULL)

  scans = list()
  for ( phen.name in phen.names ) {
    if ( is.numeric(phen.data[[phen.name]]) ) {
     cc = complete.cases(phen.data[[phen.name]])
     phen.data = phen.data[cc,]
     phen.data$x=phen.data[[phen.name]]
     scan = multicore.condensed.genome.scan.hbrem( condensed.db, phen.data, haploid=haploid, mc.cores=mc.cores)
     write.table(scan$Summary.Parameters, file=paste(phen.name, ".hbrem.summary.txt", sep=""), quote=FALSE, row=FALSE, sep="\t")
     scans[[phen.name]] = scan
   }
  }
  return(scans)
}


plot.scans.hbrem <- function ( scans, pdffile=NULL ) {

  if ( ! is.null(pdffile)) pdf( pdffile ) 
  par(mfrow=c(4,1),cex=0.4)	

  for( p in names(scans)) {
    scan = scans[[p]]$Summary.Parameters
    chr = scan$Chr[2:nrow(scan)]
    boundary = c(1, which(chr != scan$Chr[1:nrow(scan)-1]))
    boundary2 = c(boundary[2:length(boundary)],nrow(scan))
    xc = 0.5*(boundary+boundary2)
    plot( scan$CumBp, scan$F.logPval, t="l", main=p, ylab="logP", xaxs="i", xaxt="n", xlab="position")
    abline( v=scan$CumBp[boundary], col="red")
    axis(1, at=scan$CumBp[xc], labels=chr[boundary])

    plot( scan$CumBp, scan$hpd.kT.50.upper, t="l", col="grey", main=p, ylab="mode.kT", xaxs="i", xaxt="n", xlab="position")
    lines( scan$CumBp, scan$hpd.kT.50.lower, t="l", col="grey")
    lines( scan$CumBp, scan$mode.kT, t="l", col="black")
    abline( v=scan$CumBp[boundary], col="red")
    axis(1, at=scan$CumBp[xc], labels=chr[boundary])
  }
  if ( ! is.null(pdffile )) dev.off()

}

write.gscandb.scans <- function( phenotype, scans, sh.file=NULL, population="MAGIC", genome.build="TAIR9" ) {

        if ( is.null(sh.file))
		sh.file=paste(phenotype,".gscan.upload.sh",sep="")
        handle = file(sh.file,"w")
        cat("cd /data/www/cgi/gscan/\n", file=handle)

	lapply( names(scans), function( p, scans,handle ) {
                cwd = getwd()
		gscandb = data.frame(marker=scans[[p]]$result$from.marker)
	       	gscandb$additive.logP = scans[[p]]$result$logP
		gscandb.file = paste(cwd, "/", p, ".gscandb.txt", sep="")
		cat( "writing ", gscandb.file, "\n")

		cat( "sh gscanDelete.sh population=", population, " phenotype=",p," label=additive.logP\n",sep="", file=handle)
        	cat( "sh gscanLoad.sh pop=", population, " build=", genome.build,genome.build, " pheno=",p," labels=additive.logP update gscan=", gscandb.file, "\n", sep="", file=handle)
		write.table(gscandb,file=gscandb.file,sep="\t",quote=FALSE,row=FALSE)
		q = c( 0.5, 0.9, 0.95 ) 
		quantiles = NULL
		if ( !is.null(scans[[p]]$genomewide.distribution) )
		        quantiles = scans[[p]]$genomewide.distribution[ floor( length(scans[[p]]$genomewide.distribution)*q) ]
		else if ( !is.null(scans[[p]]$permuted.logP ) ) 
		        quantiles = scans[[p]]$permuted.logP[ floor( length(scans[[p]]$permuted.logP)*q) ]
		cat( population, genome.build, p, "additive.logP", "Q=0.50", quantiles[1], "Q=0.90", quantiles[2], "Q=0.95", quantiles[3], "\n", sep="\t")
        }, scans, handle )

  	close(handle)
}