R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > model='additive' > nb_cores=16 > > ##transforms the mixed model into a linear model and gets pvalue of association > my_function=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { + #trick to deal with marker files starting with weird name + element=element[mitch,] + #element is the design matrix + #to deal with subjects.data, strains.data etc that will be loaded too + n=dim(element)[1] + yt <- multiplicator %*% transformed_pheno + x_matt = multiplicator %*% x_mat + elementt <- multiplicator %*% element + fit0=lm(yt~ -1+x_matt) + fit1=lm(yt~ -1+x_matt+elementt) + anova=anova(fit0,fit1) + p=anova[,'Pr(>F)'][2] + return(p) + } > > my_function_X=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { + #trick to deal with marker files starting with weird name + element=element[mitch,] + #element is the design matrix + #to deal with subjects.data, strains.data etc that will be loaded too + n=dim(element)[1] + yt <- multiplicator %*% transformed_pheno + x_matt = multiplicator %*% x_mat + elementt <- multiplicator %*% element + fit0=lm(yt~ -1+x_matt) + fit1=lm(yt~ -1+x_matt+elementt+x_matt[,2]:elementt) + anova=anova(fit0,fit1) + p=anova[,'Pr(>F)'][2] + return(p) + } > > ###### > library(multicore) > library(emma) > > > final_mat=read.delim('/data/HSrats/rats/cytokines_Rnor5/Cytokines_CTnorm_BBnorm_DATA_140220.txt',as.is=T,row.names=1) > w=which(colnames(final_mat)=='SEX') > colnames(final_mat)[w]='sex' > w=which(colnames(final_mat)=='BATCH') > colnames(final_mat)[w]='batch' > final_mat[,'batch']=paste('Batch',final_mat[,'batch']) > for (i in 1:dim(final_mat)[2]) { + w=which(grepl(',',final_mat[,i],fixed=T)) + if (length(w)!=0) final_mat[w,i]=sub(',','.',final_mat[w,i],fixed=T) + if (any(grepl(',',final_mat[,i],fixed=T))) print(i) + } > > > #kinship matrix = IBS matrix > load('/data/HSrats/rats/genome_cache_Rnor5/kinship_Rnor5_pairwise_complete.RData') > remove=match(c("VVD_RAT_13sep10.CEL","VVF_RAT_09sep10.CEL"),colnames(kinship)) > kinship=kinship[-remove,-remove] > > #load('/data/HSrats/rats/GCTA/input4EMMA/genetic_corr_matrix_GCTA.RData') > #kinship=corr2 > kinship_fixed=kinship > #corresp [kinship et ped] et phenotypes: link between cel files names and animals ids > tot_genotyped=read.delim('/Net/fs1/groups/mott/Amelie/data_mining/new_tot_genotyped.txt',as.is=T,header=F) > colnames(tot_genotyped)=c('CEL_name','SUBJECT.NAME') > #measures and covariates > model_menu=read.delim('/data/HSrats/rats/cytokines_Rnor5/model_menu_final_cytokines.txt',header=T,colClasses='character',na.strings = "") > model_menu[,'covariates']=sub('SEX','sex',model_menu[,'covariates']) > model_menu[,'covariates']=sub('BATCH','batch',model_menu[,'covariates']) > > #map: location markers > map=read.table('/data/HSrats/rats/genome_cache_Rnor5/all_map.txt',as.is=T) > > ###to deal properly with the covariates > covariates=c() > for(i in 1:dim(model_menu)[1]) { + cov=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]] + covariates=c(covariates,cov) + } > covariates=unique(covariates) > any(grep(' ',covariates)) [1] FALSE > #FALSE > covariates_types=rep('n',length(covariates)) > motch=match(c('sex','batch'),covariates) > covariates_types[motch]='f' > names(covariates_types)=covariates > > my_do=function(i) { + measure_name=model_menu[i,2] + print(measure_name) + + # if (file.exists(paste('/data/HSrats/rats/mapping_wo_duplicate/gls_design_',model_menu[i,1],'_',measure_name,'_',model,'_init.txt',sep=''))) return(NULL) + + ##first of all creates data which has measure and covariates + if (!is.na(model_menu[i,'covariates'])) covs=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]] else covs=NULL + #covariates_types are types which can be selected by their names = homogenous to covs + if (!is.null(covs) && any(covariates_types[covs]=='sel')) { + SEL=TRUE + #at least 2 covs: 'sel' + un autre + if (length(covs)>1) { + pos_in_covs=which(covariates_types[covs] == 'sel') + #new_covs without 'sel' + new_covs=covs[-pos_in_covs] + data=final_mat[(!is.na(final_mat[,measure_name])) & (!apply(X=is.na(final_mat[,new_covs]),FUN=any,MARGIN=1)) & as.logical(final_mat[,covs[pos_in_covs]]),c(measure_name,new_covs)] + + } else { + #only one cov: has to be 'sel': will be TRUE/FALSE for keep? + #only one necessary for for loop + new_covs=NULL + data=final_mat[(!is.na(final_mat[,measure_name])) & as.logical(final_mat[,covs]),measure_name,drop=F] + + } + } else { + SEL=FALSE + new_covs=covs + + #at least 2 covs + if (!is.null(covs)&length(covs)>1) { + data=final_mat[(!is.na(final_mat[,measure_name]))&(!apply(X=is.na(final_mat[,covs]),FUN=any,MARGIN=1)),c(measure_name,covs)] + + } else if (!is.null(covs)&length(covs)==1) { + #only one cov + data=final_mat[(!is.na(final_mat[,measure_name]))&(!is.na(final_mat[,covs])),c(measure_name,covs)] + + } else + #no cov + data=final_mat[!is.na(final_mat[,measure_name]),measure_name,drop=F] + } + + print(head(data)) + + kinship=kinship_fixed + #to go from cel names to ids + motch=match(colnames(kinship),tot_genotyped[,'CEL_name']) + kinship=kinship[!is.na(motch),!is.na(motch)] + motch=match(colnames(kinship),tot_genotyped[,'CEL_name']) + if (any(is.na(motch))) {print('erreur na dans motch') ; stop('erreur na dans motch')} + ids_kinship=tot_genotyped[motch,'SUBJECT.NAME'] + + #ya des data qui ont pas ete genotype dc pas ds kinship + #mais ya aussi ds kinship qui ont pas ete phenotypes (parents et founders) + motch=match(rownames(data),ids_kinship) + data=data[!is.na(motch),,drop=F] + kinship=kinship[na.exclude(motch),na.exclude(motch)] + + load(paste('/data/HSrats/rats/genome_cache_Rnor5/chr1/subjects.RData',sep='')) + mitch=match(colnames(kinship),subjects) + if (any(is.na(mitch))) {print('erreur na dans mitch') ; stop('erreur na dans mitch')} + transformed_pheno=data[,measure_name] + + x_mat=matrix(1,nrow=dim(data)[1],ncol=1) + #length(NULL)=0 + + if (!is.null(new_covs)){ + for (c in 1:length(new_covs)) { + cov=new_covs[c] + type_cov=covariates_types[cov] + if (type_cov=='f') { + old=data[,cov] + old=as.factor(old) + levels=levels(old) + n=length(levels)-1 + matrice=matrix(ncol=n,nrow=length(old),NA) + colnames(matrice)=levels[1:n] + for (l in levels[1:n]) { + matrice[,l]=(old==l) + } + x_mat=cbind(x_mat,matrice) + } else { + data[,cov]=as.numeric(data[,cov]) + x_mat=cbind(x_mat,data[,cov]) + } + } + } + n=dim(kinship)[1] + + vecti=matrix(1,ncol=1,nrow=n) + p=diag(n)-(vecti%*%t(vecti))/n + num=(n-1)*kinship + denom=sum(diag(p%*%kinship%*%p)) + unit_kinship=num/denom + + ###variance components estimation + emma=try(emma.REMLE(y=transformed_pheno, X=x_mat, K=unit_kinship, ngrids=100, llim=-10, ulim=10,esp=1e-10, eig.R = NULL)) + + V=emma$vg*unit_kinship+emma$ve*diag(n) + + VCs=list(vg=emma$vg, ve=emma$ve) + ##creates 'multiplicator' = matrix to apply to the mixed models to transform them into linear models + #eigendecomposition of the covariance matrix + svd=try(svd(V)) + if (inherits(svd, "try-error")) {print('simulated_phenotype');next} + #check that V = svd$u %*% diag(x=eigen_values) %*% t(svd$v) = svd$u %*% diag(x=eigen_values) %*% solve(svd$u) + eigen_basis=svd$u + eigen_values=svd$d + teigen_basis=t(eigen_basis) + + #will multiply all terms by the inverse of r_mat which is so that V = R %*% R + sqrt_eigen_values=sqrt(eigen_values) + sqrt_lambda_matrix=diag(x=sqrt_eigen_values) + r_mat=eigen_basis%*%sqrt_lambda_matrix%*%teigen_basis + multiplicator=solve(r_mat) + + ##test association with all happy intervals (haplotypes) + #keep only some of the animals in the design matrix + chrs=paste('chr',c(1:20,'X'),sep='') + results=c() + for (chr in chrs) { + #has all the names of the .RData in the genome cache, including bp.RData, subjects.RData etc that well get rid of by checking the dimansions + load(paste('/data/HSrats/rats/genome_cache_Rnor5/',chr,'_all_markers.RData',sep='')) + if (chr=='X') result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function_X,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) else + result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) + result=unlist(result) + names(result)=content + results=c(results,result) + rm(list=c(content,'content')) + } + motch=match(names(results),map[,1]) + table=data.frame(marker=names(results),pvalue=results,chr=map[motch,2],bp=map[motch,3]) + table[,'pvalue']=as.numeric(table[,'pvalue']) + write.table(table,file=paste('/data/HSrats/rats/cytokines_Rnor5/gls_design_',model_menu[i,1],'_',measure_name,'_additive.txt',sep=''),sep='\t',quote=F,row.names=F,col.names=F) + return(VCs) + } > > all_VCs=list() > for (i in 1:dim(model_menu)[1]) { + if (!is.na(model_menu[i,'map_w_mixed']) & model_menu[i,'map_w_mixed']=='yes') { + print(model_menu[i,2]) + res=try(my_do(i)) + if (inherits(res, "try-error")) next + all_VCs[model_menu[i,2]]=res + } else next + } [1] "CCR_CT_BBNORM" [1] "CCR_CT_BBNORM" CCR_CT_BBNORM CCR_COV sex 45 1.187470 262.72 1 89 1.191815 263.06 1 90 1.178072 263.06 1 91 1.183811 263.06 1 92 1.178783 263.06 1 93 1.176392 263.06 1 [1] "EF_CT_BBNORM" [1] "EF_CT_BBNORM" EF_CT_BBNORM EF_COV batch sex 1 1.548207 155.71 Batch 1 1 3 1.474439 155.71 Batch 1 1 5 1.783264 155.71 Batch 1 1 6 1.463055 155.71 Batch 1 1 7 1.592636 155.71 Batch 1 1 8 1.699325 155.71 Batch 1 1 [1] "GCR_CT_BBNORM" [1] "GCR_CT_BBNORM" GCR_CT_BBNORM GCR_COV batch sex 45 1.001593 3.53 Batch 1 1 89 1.021610 5.2 Batch 1 1 90 1.038045 5.2 Batch 1 1 91 1.010164 5.2 Batch 1 1 92 1.056643 5.2 Batch 1 1 93 1.064094 5.2 Batch 1 1 [1] "gF_CT_BBNORM" [1] "gF_CT_BBNORM" gF_CT_BBNORM gF_COV sex 1 1.1426497 11.24 1 10 1.0193227 11.24 1 11 1.1456519 11.24 1 12 0.9475890 11.24 1 13 1.1786340 11.24 1 15 0.8750033 11.24 1 [1] "GMC_CT_BBNORM" [1] "GMC_CT_BBNORM" GMC_CT_BBNORM GMC_COV sex 1 1.0639235 18.41 1 3 0.9075004 18.41 1 5 1.1431191 18.41 1 6 0.8347594 18.41 1 7 0.8692559 18.41 1 8 1.0978956 18.41 1 [1] "I10R_CT_BBNORM" [1] "I10R_CT_BBNORM" I10R_CT_BBNORM I10R_COV batch sex 1 2.046964 15275.11 Batch 1 1 3 1.996330 15275.11 Batch 1 1 5 2.127295 15275.11 Batch 1 1 6 2.067957 15275.11 Batch 1 1 7 2.106946 15275.11 Batch 1 1 8 2.143796 15275.11 Batch 1 1 [1] "I12_CT_BBNORM" [1] "I12_CT_BBNORM" I12_CT_BBNORM I12_COV batch sex 1 1.292404 38.92 Batch 1 1 3 1.038525 38.92 Batch 1 1 5 1.368210 38.92 Batch 1 1 6 1.000976 38.92 Batch 1 1 7 1.109839 38.92 Batch 1 1 8 1.366321 38.92 Batch 1 1 [1] "I13_CT_BBNORM" [1] "I13_CT_BBNORM" I13_CT_BBNORM I13_COV batch sex 1 1.1779934 25.9 Batch 1 1 3 0.9814553 25.9 Batch 1 1 5 1.2872163 25.9 Batch 1 1 7 1.0608466 25.9 Batch 1 1 8 1.2523437 25.9 Batch 1 1 9 1.2526561 25.9 Batch 1 1 [1] "I17R_CT_BBNORM" [1] "I17R_CT_BBNORM" I17R_CT_BBNORM I17R_COV batch sex 1 1.189476 39.76 Batch 1 1 3 1.106077 39.76 Batch 1 1 5 1.264523 39.76 Batch 1 1 6 1.157433 39.76 Batch 1 1 7 1.156172 39.76 Batch 1 1 8 1.242612 39.76 Batch 1 1 [1] "I18R_CT_BBNORM" [1] "I18R_CT_BBNORM" I18R_CT_BBNORM I18R_COV sex 45 1.082990 21.55 1 89 1.028669 38.59 1 90 1.040850 38.59 1 91 1.054731 38.59 1 92 1.021607 38.59 1 93 1.017083 38.59 1 [1] "I1a_CT_BBNORM" [1] "I1a_CT_BBNORM" I1a_CT_BBNORM I1a_COV batch 1 0.5798851 242.54 Batch 1 3 0.5171819 242.54 Batch 1 5 0.5452803 242.54 Batch 1 6 0.5477406 242.54 Batch 1 7 0.6166330 242.54 Batch 1 8 0.5704174 242.54 Batch 1 [1] "I1b_CT_BBNORM" [1] "I1b_CT_BBNORM" I1b_CT_BBNORM I1b_COV 1 1.688008 1216.76 3 1.840302 1216.76 5 1.678979 1216.76 6 1.751528 1216.76 7 1.714390 1216.76 8 1.673547 1216.76 [1] "I2R_CT_BBNORM" [1] "I2R_CT_BBNORM" I2R_CT_BBNORM I2R_COV sex 45 1.559829 112.48 1 89 1.480423 190.45 1 90 1.520100 190.45 1 91 1.598134 190.45 1 92 1.390077 190.45 1 93 1.373535 190.45 1 [1] "I4_CT_BBNORM" [1] "I4_CT_BBNORM" I4_CT_BBNORM I4_COV batch sex 1 1.172214 33.35 Batch 1 1 3 1.012630 33.35 Batch 1 1 5 1.247332 33.35 Batch 1 1 6 1.048699 33.35 Batch 1 1 7 1.066804 33.35 Batch 1 1 8 1.200223 33.35 Batch 1 1 [1] "I5R_CT_BBNORM" [1] "I5R_CT_BBNORM" I5R_CT_BBNORM I5R_COV batch sex 284 1.629438 69.72 Batch 2 1 285 1.947423 69.72 Batch 2 1 286 1.868124 69.72 Batch 2 1 287 1.604437 69.72 Batch 2 1 288 2.534047 69.72 Batch 2 1 289 2.182763 69.72 Batch 2 1 [1] "I6_CT_BBNORM" [1] "I6_CT_BBNORM" I6_CT_BBNORM I6_COV sex 1 4.024765 2782.66 1 3 4.231057 2782.66 1 5 3.827796 2782.66 1 6 3.641906 2782.66 1 7 3.702248 2782.66 1 8 3.589750 2782.66 1 [1] "I7_CT_BBNORM" [1] "I7_CT_BBNORM" I7_CT_BBNORM I7_COV batch sex 1 0.9517176 202.66 Batch 1 1 3 0.9716675 202.66 Batch 1 1 5 0.9537721 202.66 Batch 1 1 6 0.9718501 202.66 Batch 1 1 7 0.9656824 202.66 Batch 1 1 8 0.9573269 202.66 Batch 1 1 [1] "M1aF_CT_BBNORM" [1] "M1aF_CT_BBNORM" M1aF_CT_BBNORM M1aF_COV sex 1 1.854520 1645.99 1 3 1.884698 1645.99 1 5 1.973759 1645.99 1 6 1.985563 1645.99 1 7 1.873944 1645.99 1 8 2.071332 1645.99 1 [1] "M3aR_CT_BBNORM" [1] "M3aR_CT_BBNORM" M3aR_CT_BBNORM M3aR_COV sex 1 1.269561 49.59 1 3 1.194628 49.59 1 5 1.461701 49.59 1 6 1.505926 49.59 1 7 1.255217 49.59 1 8 1.414512 49.59 1 [1] "MC_CT_BBNORM" [1] "MC_CT_BBNORM" MC_CT_BBNORM MC_COV sex 1 1.115273 37.38 1 3 1.064929 37.38 1 5 1.132440 37.38 1 6 1.103410 37.38 1 7 1.081909 37.38 1 8 1.143691 37.38 1 [1] "TF_CT_BBNORM" [1] "TF_CT_BBNORM" TF_CT_BBNORM TF_COV batch sex 1 9.659739 8009.37 Batch 1 1 5 11.297184 8009.37 Batch 1 1 6 12.696971 8009.37 Batch 1 1 7 11.418995 8009.37 Batch 1 1 8 9.952102 8009.37 Batch 1 1 9 10.356855 8009.37 Batch 1 1 [1] "VR_CT_BBNORM" [1] "VR_CT_BBNORM" VR_CT_BBNORM VR_COV batch sex 45 1.147490 40.35 Batch 1 1 89 1.070530 51.55 Batch 1 1 90 1.083528 51.55 Batch 1 1 91 1.160910 51.55 Batch 1 1 92 1.058952 51.55 Batch 1 1 93 1.047225 51.55 Batch 1 1 [1] "GL_CT_BBNORM" [1] "GL_CT_BBNORM" GL_CT_BBNORM GL_COV batch 45 6.091148 2917.2 Batch 1 96 5.548231 2917.2 Batch 1 142 5.379322 2917.2 Batch 1 145 5.656331 2917.2 Batch 1 155 4.454826 2917.2 Batch 1 156 5.536467 2917.2 Batch 1 There were 23 warnings (use warnings() to see them) > > save(all_VCs,file='/data/HSrats/rats/cytokines_Rnor5/EMMA_variance_components.RData') > > > > > > >