library(happy.hbrem) #library(multicore) library(parallel) library(survival) library(hash) # HAPPY.preCC.R # (C) Richard Mott 2010; richard.mott@well.ox.ac.uk # Distributed under the GPL # This code contains functions to analyse QTLs in MAGIC recombinant inbred lines. # See http:/mus.well.ox.ac.uk/magic for instructions. make.condensed.genome <- function( chrs, prefix, gdir= "./", dir="./CONDENSED/", thin=20, extension=".data", gen=7, haploid=FALSE, mc.cores=10) { lapply( chrs, make.condensed.chr.database, prefix=prefix, gdir=gdir, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid) # mclapply( chrs, make.condensed.chr.database, prefix=prefix, gdir=gdir, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid, mc.cores=mc.cores) if ( haploid ) models = c( "additive" ) else models = c( "additive", "full" ) db = list( chrs=chrs, prefix=prefix, dir=dir, thin=thin, extension=extension, gen=gen, haploid=haploid, models=models, chrs=chrs ) save( db, file=paste( dir, "/db.RData", sep="")) } make.condensed.chr.database <- function( chr, prefix, gdir="./", dir="./CONDENSED/", thin=20, extension=".data", gen=7, haploid=FALSE) { dir.create(dir, recursive=TRUE, show=FALSE) pedfile = paste(gdir, chr, prefix, extension, sep="") allelesfile = paste( gdir, chr, prefix, ".alleles", sep="") mapfile = paste( gdir, chr, prefix, ".map", sep="") cat( pedfile, file.exists(pedfile), allelesfile, file.exists(allelesfile), mapfile, file.exists(mapfile), "\n") h = happy( pedfile, allelesfile, gen=gen, file="ped", mapfile=mapfile, haploid=haploid) if ( haploid ) models = c( "additive" ) else models = c( "additive", "full" ) subjects=h$subjects strains=h$strains save(subjects, file=paste(dir, "/subjects.RData", sep="")) save(strains, file=paste(dir, "/strains.RData", sep="")) condensed.params = c(thin=thin, pedfile=pedfile, allelesfile=allelesfile, mapfile=mapfile, gen=gen, haploid=haploid, models=models) save(condensed.params, file=paste(dir, "/", chr, ".param.RData", sep="")) subdir = paste(dir, "/chr/", sep="") dir.create(subdir, recursive=TRUE, show=FALSE) for( model in models ) { scandir = paste(subdir, model, "/", sep="") dir.create(scandir, recursive=TRUE, show=FALSE) condensed = make.condensed.matrices( h, thin, model=model ) Rfile = paste(scandir, chr, ".RData", sep="") save(condensed, file=Rfile) x = sapply( condensed, function( x ) { return (c( x$from, x$to, x$from.marker, x$to.marker, x$len ))}) condensed.summary = data.frame(t(x)) names(condensed.summary) = c( "from", "to", "from.marker", "to.marker", "width") condensed.summary$from.bp=h$bp[match(condensed.summary$from.marker,h$marker)] condensed.summary$to.bp=h$bp[match(condensed.summary$to.marker,h$marker)] condensed.summary$chromosome=rep(chr,nrow(condensed.summary)) save(condensed.summary, file=paste(scandir, chr, ".condensed.summary.RData", sep="")) genome = data.frame(marker=h$markers,chromosome=h$chromosome,bp=h$bp,cm=h$map) save(genome, file=paste(scandir, chr, ".summary.RData", sep="")) } } # loads a pre-created database of condensed HAPPY design matrices into memory # returns an object of class "condensed.happy" with the following fields # subjects # strains # chr$additive$$chr$chr1 ....chr$additive$chr19 .... # additive$condensed.summary # chr$full$chr$chr1 ....chr$full$chr19 .... load.condensed.database <- function( dir="./CONDENSED/" ) { load( paste(dir, "/db.RData",sep="")) load( paste(dir, "/strains.RData", sep="")) db$strains = strains; load( paste(dir, "/subjects.RData", sep="")) db$subjects = subjects; cat( "loading condensed db", dir, "\n") models = db$models for( model in models ) { n = 0; db[[model]] = list() db[[model]]$chr = list() db[[model]]$matrices = list() h = hash() files = paste(dir, "/chr/", model, "/", db$chrs, ".RData", sep="") for ( i in 1:length(files)) { chr.data = files[i] load( chr.data ) db[[model]]$chr[[db$chrs[i]]]=condensed db[[model]]$matrices = c( db[[model]]$matrices, condensed) n = n + length(db[[model]]$chr[[db$chrs[i]]]) lapply( condensed, function( item ) { h[[item$from.marker]] <<- item$mat } ) } db[[model]]$hash = h cat( "model", model, "read ", n, " matrices\n"); cat( "loading genome summary\n") db[[model]]$summary = NULL files = paste(dir, "/chr/", model, "/", db$chrs, ".summary.RData", sep="") for (chr.summary in files ) { load( chr.summary ) db[[model]]$summary = rbind( db[[model]]$summary, genome ) } cat ( "loading condensed summary\n") db[[model]]$condensed.summary = NULL files = paste(dir, "/chr/", model, "/", db$chrs, ".condensed.summary.RData", sep="") for (condensed.chr.summary in files ) { load( condensed.chr.summary ) db[[model]]$condensed.summary = rbind( db[[model]]$condensed.summary, condensed.summary ) } # create some data objects so that the resulting object looks like a genome.cache... idx = match( db[[model]]$condensed.summary$from.marker, db[[model]]$summary$marker) db[[model]]$subjects = subjects db[[model]]$subjects = strains db[[model]]$genome = db[[model]]$summary[idx,] db[[model]]$genome$map = db[[model]]$genome$cm db[[model]]$markers = db[[model]]$genome$marker db[[model]]$chromosome = db[[model]]$genome$chromosome db[[model]]$map = db[[model]]$genome$map } load( paste(dir, "/strains.RData", sep="")) db$strains = strains; load( paste(dir, "/subjects.RData", sep="")) db$subjects = subjects; class(db) = "condensed.happy"; return(db) } condensed.hdesign <- function( db, marker, model="additive" ) { return(db[[model]]$hash[[marker]]) } condensed.genome.scan.lm <- function( condensed.db, phen.data, null.model, permute=0, model="additive", phen.name = NULL ) { if ( class(condensed.db) != "condensed.happy" ) { warning( "condensed.db not of class condensed.happy\n") return(NULL) } int = intersect( condensed.db$subjects, unique(phen.data$CC.Line)) cat( length(int), "subjects analysed with phenotypes\n") # phen.data = phen.data[phen.data$CC.Line %in% int,] phen.data = phen.data[match( int, phen.data$CC.Line ,nomatch=0),] if ( !is.null(phen.name)) hist(phen.data[[phen.name]], xlab=phen.name, main=phen.name) permuted.data=NULL if ( is.numeric(permute) & permute>0) { f = lm( as.formula(null.model), data=phen.data, y=TRUE) permuted.data = replicate( permute, sample(residuals(f))) permuted.data = cbind( residuals(f), permuted.data) } db.use = match(int, phen.data$CC.Line, nomatch=0) fit.null = lm( null.model, data=phen.data ); print(anova(fit.null)) genetic.model = paste( as.character(null.model), " + d") result = condensed.db[[model]]$condensed.summary if ( is.numeric(permute) & permute>0) { chr = condensed.db[[model]]$chr perm.result = NULL for ( i in 1:length(chr)) { mats = chr[[i]] plp = scan.chromosome.lm( mats, db.use, genetic.model, phen.data, permuted.data, fit.null) perm.result = rbind(perm.result,t(plp)) } result$logP = perm.result[,1] result$pointwise.permute.pval = apply( perm.result, 1, function(w) {sum(w[1]<=w[2:length(w)])} )/permute genomewide.distribution = sort(apply( perm.result[,2:ncol(perm.result)], 2, max )) result$genomewide.permute.pval = sapply( result$logP, function(x,genomewide.max) { sum(x <= genomewide.max)}, genomewide.max)/permute return( list(result=result, genomewide.distribution=genomwide.distribution) ) } else { logP = NULL chr = condensed.db[[model]]$chr for ( i in 1:length(chr)) { mats = chr[[i]] lp = sapply( mats, function( e, db.use, genetic.model, phen.data, fit.null, subjects) { d = e$mat rownames(d) = subjects d = d[db.use,] fit.genetic = lm( as.formula(genetic.model), data=phen.data) a = anova(fit.null, fit.genetic) return( -log10(a[2,6])) } , db.use, genetic.model, phen.data, fit.null, condensed.db$subjects) logP = c( logP, lp ) } result$logP = logP return(list(result=result)) } } scan.chromosome.lm <- function( mats, db.use, genetic.model, phen.data, permuted.data, fit.null ) { return( sapply( mats, test.locus.lm, db.use, genetic.model, phen.data, permuted.data, fit.null )) } scan.chromosome.survival <- function( mats, db.use, genetic.model, phen.data, fit.null, subjects ) { cat ("scan.chromosome.survival\n"); return( sapply( mats, test.locus.survival, db.use, genetic.model, phen.data, fit.null, subjects )) } test.locus.name.lm <- function( locus, db, db.use, genetic.model, phen.data, permuted.data, fit.null ) { d = condensed.hdesign(db, locus) return(test.locus.internal.lm( d, db.use, genetic.model, phen.data, permuted.data, fit.null ) ) } test.locus.lm <- function( e, db.use, genetic.model, phen.data, permuted.data, fit.null) { d = e$mat return(test.locus.internal.lm( d, db.use, genetic.model, phen.data, permuted.data, fit.null ) ) } test.locus.internal.lm <- function( d, db.use, genetic.model, phen.data, permuted.data, fit.null ) { d = d[db.use,] fit.genetic = lm( as.formula(genetic.model), data=phen.data, qr=TRUE, y=TRUE) a = anova(fit.null, fit.genetic) y = fit.genetic$y y = y-mean(y) tss = sum(y*y) n = length(y)-fit.null$rank df = fit.genetic$rank - fit.null$rank rss = apply( permuted.data, 2, function( y, qr ) { r = qr.resid(qr,y); return(sum(r*r)) } , fit.genetic$qr ) fss = tss-rss; f.stat=(fss/df)/(rss/(n-df)); p = -pf( f.stat, df, n-df, lower=FALSE, log=TRUE)/log(10) # return( c(-log10(a[2,6]), p)) return( p) } test.locus.survival <- function( e, db.use, genetic.model, phen.data, fit.null, subjects ) { d = e$mat rownames(d) = subjects d = d[db.use,] phen.data$d = d fit.genetic = survreg( as.formula(genetic.model), data=phen.data) a = anova(fit.null, fit.genetic) # return( c(-log10(a[2,6]), p)) return( -log10(a[2,7])) } multicore.condensed.genome.scan.lm <- function( condensed.db, phen.data, null.model, permute=0, model="additive", mc.cores=8, subject.name="CC.Line" ) { if ( class(condensed.db) != "condensed.happy" ) { warning( "condensed.db not of class condensed.happy\n") return(NULL) } int = intersect( condensed.db$subjects, unique(phen.data[[subject.name]])) cat( length(int), "subjects analysed with phenotypes\n") phen.data = phen.data[match(int, phen.data[[subject.name]],nomatch=0),] permuted.data=NULL if ( is.numeric(permute) & permute>0) { f = lm( as.formula(null.model), data=phen.data, y=TRUE) permuted.data = replicate( permute, sample(residuals(f))) permuted.data = cbind( residuals(f), permuted.data) } db.use = match(int, condensed.db$subjects,nomatch=0) fit.null = lm( null.model, data=phen.data ); print(anova(fit.null)) genetic.model = paste( paste(as.character(null.model)), " + d", collapse=" ") result = condensed.db[[model]]$condensed.summary if ( is.numeric(permute) & permute>0) { chr = condensed.db[[model]]$chr perm.result = NULL # plps = lapply(chr, scan.chromosome.lm, db.use, genetic.model, phen.data, permuted.data, fit.null ) plps = mclapply(chr, scan.chromosome.lm, db.use, genetic.model, phen.data, permuted.data, fit.null, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores ) for ( i in 1:length(plps)) { perm.result = rbind(perm.result,t(plps[[i]])) } result$logP = perm.result[,1] result$pointwise.permute.pval = apply( perm.result, 1, function(w) {sum(w[1]<=w[2:length(w)])} )/permute genomewide.distribution = sort(apply( perm.result[,2:ncol(perm.result)], 2, max )) result$genomewide.permute.pval = sapply( result$logP, function(x,genomewide.distribution) { sum(x <= genomewide.distribution)}, genomewide.distribution)/permute return( list(result=result, genomewide.distribution=genomewide.distribution) ) } else { logP = NULL chr = condensed.db[[model]]$chr for ( i in 1:length(chr)) { mats = chr[[i]] lp = sapply( mats, function( e, db.use, genetic.model, phen.data, fit.null, subjects) { d = e$mat rownames(d) = subjects d = d[db.use,] fit.genetic = lm( as.formula(genetic.model), data=phen.data) a = anova(fit.null, fit.genetic) return( -log10(a[2,6])) } , db.use, genetic.model, phen.data, fit.null, condensed.db$subjects) logP = c( logP, lp ) } result$logP = logP return(list(result=result)) } } multicore.condensed.genome.scan.survival <- function( condensed.db, phen.data, null.model, permute=0, model="additive", mc.cores=8 ) { if ( class(condensed.db) != "condensed.happy" ) { warning( "condensed.db not of class condensed.happy\n") return(NULL) } int = intersect( condensed.db$subjects, unique(phen.data$CC.Line)) cat( length(int), "subjects analysed with phenotypes\n") phen.data = phen.data[phen.data$CC.Line %in% int,] null.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ Sex") base.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ 1") db.use = phen.data$CC.Line fit.null = survreg( null.model, data=phen.data ); fit.base = survreg( base.model, data=phen.data ); print(anova(fit.base,fit.null)) genetic.model = as.formula( "Surv(phen.data$x,phen.data$death) ~ Sex + d") result = condensed.db[[model]]$condensed.summary chr = condensed.db[[model]]$chr plps = mclapply(chr, scan.chromosome.survival, db.use, genetic.model, phen.data, fit.null, condensed.db$subjects, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores ) logP = NULL for ( i in 1:length(plps)) logP = c( logP, plps[[i]]) result$logP = logP return( list(result=result)) } make.condensed.matrices <-function( h, thin=100, model="additive" ) { nm = length(h$markers)-thin condensed = lapply( seq(1,nm,thin), function ( m, h, thin ) { mat = hdesign( h, m, model=model ) m2=m if ( thin > 1 ) { m1 = m+1 m2 = m+thin-1 if ( m2 >= length(h$markers)) m2 = length(h$markers)-1 denom = m2-m+1 for( i in m1:m2) { mat = mat + hdesign( h, i, model=model ) } mat = mat/thin } # cat( m, m2, h$markers[m],h$markers[m2],thin, "\n") return( list( mat=mat, from=m, to=m2, from.marker=h$markers[m], to.marker=h$markers[m2], len=thin )) }, h, thin ) return(condensed) } read.phenotypes <- function( data.file, condense=TRUE, condense.phenotype=NULL ) { df = read.delim(data.file) nam = names(df) err=0 if ( is.null(df$CC.Line) ) { warning( "file ", data.file, "has no column CC.Line\n") err = err+1 } if ( is.null(df$sex) ) { warning( "file ", data.file, "has no column sex\n") err = err+1 } if ( err > 0) return(NULL) if ( condense ) { condensed = list() CC.lines = unique(as.character(df$CC.Line)) if ( is.null(condense.phenotype)) { for ( col in names(df)) { if ( is.numeric(df[[col]]) ) { a = aggregate( df[[col]], by=list(CC.Line=df$CC.Line, sex=df$sex), FUN="median") condensed[[col]] = a$x[match(CC.lines,a$CC.Line)]; } else condensed[[col]] = df[match(CC.lines, df$CC.Line),col] } } else { if ( is.numeric(df[[condense.phenotype]]) ) { a = aggregate( df[[condense.phenotype]], by=list(CC.Line=df$CC.Line), FUN=function(x) { quantile( x, p=c(0.5), type=1 ) } ) idx = which( a$CC.Line==a$CC.Line & a[[condense.phenotype]]==a$x) condensed = a[idx,] } else { warning( "condensed phenotype ", condense.phenotype, "not numeric\n") return(NULL) } } return(data.frame(condensed)) } else { return(df) } } scan.phenotypes.lm <- function( condensed.db, data.file, phen, permute=0, histogram.pdf=NULL, covariates = c("Sex"), mc.cores=10 ) { phen.data = read.delim(data.file) if ( !is.null(covariates)) { cols = c( "CC.Line", phen) null.model = paste( "~", paste( covariates, sep=" + ")) } else { null.model = "~ 1" cols = c( "CC.Line", phen, covariates ) } err=0 for ( p in cols ) { if ( is.null( phen.data[[p]] ) ) { warning( "file ", data.file, " has no column ", p, "\n") err = err+1 } } if ( err > 0) return(NULL) scans = list() phens = c() for ( p in phen ) { cat( p, "\n") if ( is.numeric(phen.data[[p]]) ) { phens = c(phens, p) phen.data[[p]] = ifelse( phen.data[[p]] == 0.0 , NA, phen.data[[p]] ) agg.phen.data = aggregate( phen.data[[p]], by=list(CC.Line=phen.data$CC.Line, Sex=phen.data$Sex), FUN="median") agg.phen.data[[p]] = agg.phen.data$x # scan = condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( p, null.model), permute=permute, phen.name=phen.name) scan = multicore.condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( p, null.model), permute=permute, mc.cores=mc.cores) write.table(scan$result, file=paste(p, ".scan.txt", sep=""), quote=FALSE, row=FALSE, sep="\t") scans[[p]] = scan } } plot.histograms( histogram.pdf, agg.phen.data, phens ) return(scans) } survival.median <- function(y) { quantile( y, probs=0.5, type=1, na.rm=TRUE); } scan.survival <- function( condensed.db, data.file, surv.time, surv.censored=28, permute=0, histogram.pdf=NULL, mc.cores=10 ) { phen.data = read.delim(data.file) null.model = "~ Sex" err=0 for ( p in c( "CC.Line", "Sex", surv.time )) { if ( is.null( phen.data[[p]] ) ) { warning( "file ", data.file, " has no column ", p, "\n") err = err+1 } } if ( err > 0) return(NULL) scans = list() if ( is.numeric(phen.data[[surv.time]]) ) { agg.phen.data = aggregate( phen.data[[surv.time]], by=list(CC.Line=phen.data$CC.Line, Sex=phen.data$Sex), FUN="median" ) agg.phen.data$death = agg.phen.data$x < surv.censored s = Surv( agg.phen.data$x, agg.phen.data$death ) # scan = condensed.genome.scan.lm( condensed.db, agg.phen.data, paste( , null.model), permute=permute, phen.name=phen.name) scan = multicore.condensed.genome.scan.survival( condensed.db, agg.phen.data, null.model, permute=permute, mc.cores=mc.cores) write.table(scan$result, file=paste(p, ".scan.txt", sep=""), quote=FALSE, row=FALSE, sep="\t") scans[[p]] = scan } phen.data.plot = data.frame(x = agg.phen.data$x) names(phen.data.plot) = c( surv.time) plot.histograms( histogram.pdf, phen.data.plot, c(surv.time) ) return(scans) } plot.histograms <- function( histogram.pdf, phen.data, phen ) { if ( ! is.null(histogram.pdf)) { pdf(histogram.pdf) par(mfrow=c(3,3)) for ( p in phen ) { if ( ! is.null(phen.data[[p]])) hist(phen.data[[p]], xlab=p, main=p) } dev.off() } } plot.scans <- function ( db, scans, pdffile=NULL,q = c( 0.5, 0.9, 0.95 ) ) { if ( ! is.null(pdffile)) pdf( pdffile ) par(mfrow=c(4,1),cex=0.4) for( p in names(scans)) { scan = scans[[p]]$result quantiles = scans[[p]]$genomewide.distribution[ floor( length(scans[[p]]$genomewide.distribution)*q) ] my = max(quantiles,scan$logP) chr = scan$chromosome[2:nrow(scan)] boundary = c(1, which(chr != scan$chromosome[1:nrow(scan)-1])) boundary2 = c(boundary[2:length(boundary)],nrow(scan)) xc = 0.5*(boundary+boundary2) plot( 1:nrow(scan), scan$logP, t="l", ylim=c(0,my*1.05),main=p, ylab="logP", xaxs="i", xaxt="n", xlab="position") abline( v=boundary, col="red") abline(h=quantiles,col="grey") axis(1, at=xc, labels=chr[boundary]) if ( !is.null(scans[[p]]$qtls) ) { qtls = scans[[p]]$qtls[scans[[p]]$qtls$phenotype==p,] x = match( qtls$peak.SNP, scan$from.marker) points(x,rep(my,length(x)),pch=20,col="orange") } } if ( ! is.null(pdffile)) dev.off() } # HBREM scan.chromosome.hbrem <- function( loci, db.use, phen.data, HaploidInd, Ndip, Nind, Npost, Nbin) { return(sapply( loci, test.locus.hbrem, phen.data, db.use, HaploidInd, Ndip, Nind, Npost, Nbin) ) } test.locus.hbrem <- function(locus, phen.data, db.use, HaploidInd, Ndip, Nind, Npost, Nbin) { d = locus$mat[db.use,] hb <- hbrem(RX=d, HaploidInd=HaploidInd, Ndip=Ndip, Nind=Nind, Npost=Npost, Nbin=Nbin, Ry=phen.data) reg.full.lm <- lm(phen.data ~ d) reg.null.lm <- lm(phen.data ~ 1) Ftest <- anova(reg.null.lm, reg.full.lm) pval <- Ftest$"Pr(>F)"[2] # cat( locus$from.marker, -log10(pval), hb[[1]][12],"\n" ) return( c( -log10(pval), hb[[1]], hb[[2]], hb[[3]], hb[[4]] )) } multicore.condensed.genome.scan.hbrem <- function( condensed.db, phen.data, mc.cores=8, haploid=FALSE ) { if ( class(condensed.db) != "condensed.happy" ) { warning( "condensed.db not of class condensed.happy\n") return(NULL) } Npost <- 2000 Nbin <- 2 HaploidInd <- ifelse( haploid, 0, 1) if ( HaploidInd == 0 ) { model = "additive" Ndip = length(condensed.db$strains) } else { model = "full" ns = length(condensed.db$strains) Ndip = ns*(ns+1)/2 } int = intersect( condensed.db$subjects, unique(phen.data$SUBJECT.NAME)) cat( length(int), "subjects analysed with phenotypes\n") phen.data = phen.data[phen.data$SUBJECT.NAME %in% int,] db.use = match( phen.data$SUBJECT.NAME, condensed.db$subjects) Nind = nrow(phen.data) result = condensed.db[[model]]$condensed.summary chr = condensed.db[[model]]$chr plps = mclapply(chr, scan.chromosome.hbrem, db.use, phen.data$x, HaploidInd, Ndip, Nind, Npost, Nbin, mc.preschedule = TRUE, mc.set.seed = TRUE, mc.silent = FALSE, mc.cores=mc.cores ) # plps = lapply(chr, scan.chromosome.hbrem, db.use, phen.data$x, HaploidInd, Ndip, Nind, Npost, Nbin) res = t(do.call( "cbind", plps)) mark.pars.df <- data.frame(res[,1:46]) names(mark.pars.df) = c( "F.logPval", "Hbar", "sd.Ni", "BIC.qtl", "BIC.null", "BF", "logBF", "DIC.qtl", "DIC.null", "DIC.diff", "pd.qtl", "pd.null", "mode.kT", "ga", "gb", "mode.var", "med.kT", "med.mu", "med.var", "mean.kT", "mean.mu", "mean.var", "hpd.kT.50.lower", "hpd.kT.50.upper", "hpd.mu.50.lower", "hpd.mu.50.upper", "hpd.var.50.lower", "hpd.var.50.upper", "hpd.kT.75.lower", "hpd.kT.75.upper", "hpd.mu.75.lower", "hpd.mu.75.upper", "hpd.var.75.lower", "hpd.var.75.upper", "hpd.kT.95.lower", "hpd.kT.95.upper", "hpd.mu.95.lower", "hpd.mu.95.upper", "hpd.var.95.lower", "hpd.var.95.upper", "hpd.kT.99.lower", "hpd.kT.99.upper", "hpd.mu.99.lower", "hpd.mu.99.upper", "hpd.var.99.lower", "hpd.var.99.upper") offset = ncol(mark.pars.df) mark.pars.df$Name=condensed.db[[model]]$genome$marker mark.pars.df$Chr = condensed.db[[model]]$genome$chromosome mark.pars.df$Bp = condensed.db[[model]]$genome$bp bp2 = mark.pars.df$Bp[2:length(mark.pars.df$Bp)] bidx = which(bp2 < mark.pars.df$Bp[1:(length(mark.pars.df$Bp)-1)])-1 mark.pars.df$CumBp = rep(0,nrow(mark.pars.df)) mark.pars.df$CumBp[bidx+2] = mark.pars.df$Bp[bidx+1] mark.pars.df$CumBp = cumsum(mark.pars.df$CumBp) + mark.pars.df$Bp strain.means.df <- data.frame( res[,offset:(offset+Ndip-1)]) strain.sdevs.df <- data.frame( res[,(offset+Ndip):(offset+2*Ndip-1)]) strain.avNis.df <- data.frame( res[,(offset+2*Ndip):(offset+3*Ndip-1)]) if ( HaploidInd ==0 ) { names(strain.means.df) <- condensed.db$strains names(strain.sdevs.df) <- condensed.db$strains names(strain.avNis.df) <- condensed.db$strains } else { strain.names = condensed.db$strains num.strains = length(condensed.db$strains) diplotype.names <- matrix(kronecker(strain.names, strain.names, paste, sep="."), nrow=num.strains) names.full.symmetric <- c( diag(diplotype.names), diplotype.names[upper.tri(diplotype.names, diag=FALSE)]) names(strain.means.df) <- names.full.symmetric names(strain.sdevs.df) <- names.full.symmetric names(strain.avNis.df) <- names.full.symmetric } hbrem.region.list <- list(Summary.Parameters=mark.pars.df, Group.Means=strain.means.df, Group.StDevs=strain.sdevs.df, Group.ExpCounts=strain.avNis.df) return(hbrem.region.list) } scan.hbrem <- function( condensed.db, data.file, phen.names, haploid=TRUE, mc.cores=10, subject.name="SUBJECT.NAME" ) { phen.data = read.delim(data.file) err=0 for ( p in c( subject.name, phen.names )) { if ( is.null( phen.data[[p]] ) ) { warning( "file ", data.file, " has no column ", p, "\n") err = err+1 } } if ( err > 0) return(NULL) scans = list() for ( phen.name in phen.names ) { if ( is.numeric(phen.data[[phen.name]]) ) { cc = complete.cases(phen.data[[phen.name]]) phen.data = phen.data[cc,] phen.data$x=phen.data[[phen.name]] scan = multicore.condensed.genome.scan.hbrem( condensed.db, phen.data, haploid=haploid, mc.cores=mc.cores) write.table(scan$Summary.Parameters, file=paste(phen.name, ".hbrem.summary.txt", sep=""), quote=FALSE, row=FALSE, sep="\t") scans[[phen.name]] = scan } } return(scans) } plot.scans.hbrem <- function ( scans, pdffile=NULL ) { if ( ! is.null(pdffile)) pdf( pdffile ) par(mfrow=c(4,1),cex=0.4) for( p in names(scans)) { scan = scans[[p]]$Summary.Parameters chr = scan$Chr[2:nrow(scan)] boundary = c(1, which(chr != scan$Chr[1:nrow(scan)-1])) boundary2 = c(boundary[2:length(boundary)],nrow(scan)) xc = 0.5*(boundary+boundary2) plot( scan$CumBp, scan$F.logPval, t="l", main=p, ylab="logP", xaxs="i", xaxt="n", xlab="position") abline( v=scan$CumBp[boundary], col="red") axis(1, at=scan$CumBp[xc], labels=chr[boundary]) plot( scan$CumBp, scan$hpd.kT.50.upper, t="l", col="grey", main=p, ylab="mode.kT", xaxs="i", xaxt="n", xlab="position") lines( scan$CumBp, scan$hpd.kT.50.lower, t="l", col="grey") lines( scan$CumBp, scan$mode.kT, t="l", col="black") abline( v=scan$CumBp[boundary], col="red") axis(1, at=scan$CumBp[xc], labels=chr[boundary]) } if ( ! is.null(pdffile )) dev.off() } write.gscandb.scans <- function( phenotype, scans, sh.file=NULL, population="MAGIC", genome.build="TAIR9" ) { if ( is.null(sh.file)) sh.file=paste(phenotype,".gscan.upload.sh",sep="") handle = file(sh.file,"w") cat("cd /data/www/cgi/gscan/\n", file=handle) lapply( names(scans), function( p, scans,handle ) { cwd = getwd() gscandb = data.frame(marker=scans[[p]]$result$from.marker) gscandb$additive.logP = scans[[p]]$result$logP gscandb.file = paste(cwd, "/", p, ".gscandb.txt", sep="") cat( "writing ", gscandb.file, "\n") cat( "sh gscanDelete.sh population=", population, " phenotype=",p," label=additive.logP\n",sep="", file=handle) cat( "sh gscanLoad.sh pop=", population, " build=", genome.build,genome.build, " pheno=",p," labels=additive.logP update gscan=", gscandb.file, "\n", sep="", file=handle) write.table(gscandb,file=gscandb.file,sep="\t",quote=FALSE,row=FALSE) q = c( 0.5, 0.9, 0.95 ) quantiles = NULL if ( !is.null(scans[[p]]$genomewide.distribution) ) quantiles = scans[[p]]$genomewide.distribution[ floor( length(scans[[p]]$genomewide.distribution)*q) ] else if ( !is.null(scans[[p]]$permuted.logP ) ) quantiles = scans[[p]]$permuted.logP[ floor( length(scans[[p]]$permuted.logP)*q) ] cat( population, genome.build, p, "additive.logP", "Q=0.50", quantiles[1], "Q=0.90", quantiles[2], "Q=0.95", quantiles[3], "\n", sep="\t") }, scans, handle ) close(handle) }