model='additive' nb_cores=16 ##transforms the mixed model into a linear model and gets pvalue of association my_function=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { #trick to deal with marker files starting with weird name element=element[mitch,] #element is the design matrix #to deal with subjects.data, strains.data etc that will be loaded too n=dim(element)[1] yt <- multiplicator %*% transformed_pheno x_matt = multiplicator %*% x_mat elementt <- multiplicator %*% element fit0=lm(yt~ -1+x_matt) fit1=lm(yt~ -1+x_matt+elementt) anova=anova(fit0,fit1) p=anova[,'Pr(>F)'][2] return(p) } my_function_X=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { #trick to deal with marker files starting with weird name element=element[mitch,] #element is the design matrix #to deal with subjects.data, strains.data etc that will be loaded too n=dim(element)[1] yt <- multiplicator %*% transformed_pheno x_matt = multiplicator %*% x_mat elementt <- multiplicator %*% element fit0=lm(yt~ -1+x_matt) fit1=lm(yt~ -1+x_matt+elementt+x_matt[,2]:elementt) anova=anova(fit0,fit1) p=anova[,'Pr(>F)'][2] return(p) } ###### library(multicore) library(emma) final_mat=read.delim('/data/HSrats/rats/cytokines_Rnor5/Cytokines_CTnorm_BBnorm_DATA_140220.txt',as.is=T,row.names=1) w=which(colnames(final_mat)=='SEX') colnames(final_mat)[w]='sex' w=which(colnames(final_mat)=='BATCH') colnames(final_mat)[w]='batch' final_mat[,'batch']=paste('Batch',final_mat[,'batch']) for (i in 1:dim(final_mat)[2]) { w=which(grepl(',',final_mat[,i],fixed=T)) if (length(w)!=0) final_mat[w,i]=sub(',','.',final_mat[w,i],fixed=T) if (any(grepl(',',final_mat[,i],fixed=T))) print(i) } #kinship matrix = IBS matrix load('/data/HSrats/rats/genome_cache_Rnor5/kinship_Rnor5_pairwise_complete.RData') remove=match(c("VVD_RAT_13sep10.CEL","VVF_RAT_09sep10.CEL"),colnames(kinship)) kinship=kinship[-remove,-remove] #load('/data/HSrats/rats/GCTA/input4EMMA/genetic_corr_matrix_GCTA.RData') #kinship=corr2 kinship_fixed=kinship #corresp [kinship et ped] et phenotypes: link between cel files names and animals ids tot_genotyped=read.delim('/Net/fs1/groups/mott/Amelie/data_mining/new_tot_genotyped.txt',as.is=T,header=F) colnames(tot_genotyped)=c('CEL_name','SUBJECT.NAME') #measures and covariates model_menu=read.delim('/data/HSrats/rats/cytokines_Rnor5/model_menu_final_cytokines.txt',header=T,colClasses='character',na.strings = "") model_menu[,'covariates']=sub('SEX','sex',model_menu[,'covariates']) model_menu[,'covariates']=sub('BATCH','batch',model_menu[,'covariates']) #map: location markers map=read.table('/data/HSrats/rats/genome_cache_Rnor5/all_map.txt',as.is=T) ###to deal properly with the covariates covariates=c() for(i in 1:dim(model_menu)[1]) { cov=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]] covariates=c(covariates,cov) } covariates=unique(covariates) any(grep(' ',covariates)) #FALSE covariates_types=rep('n',length(covariates)) motch=match(c('sex','batch'),covariates) covariates_types[motch]='f' names(covariates_types)=covariates my_do=function(i) { measure_name=model_menu[i,2] print(measure_name) # if (file.exists(paste('/data/HSrats/rats/mapping_wo_duplicate/gls_design_',model_menu[i,1],'_',measure_name,'_',model,'_init.txt',sep=''))) return(NULL) ##first of all creates data which has measure and covariates if (!is.na(model_menu[i,'covariates'])) covs=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]] else covs=NULL #covariates_types are types which can be selected by their names = homogenous to covs if (!is.null(covs) && any(covariates_types[covs]=='sel')) { SEL=TRUE #at least 2 covs: 'sel' + un autre if (length(covs)>1) { pos_in_covs=which(covariates_types[covs] == 'sel') #new_covs without 'sel' new_covs=covs[-pos_in_covs] data=final_mat[(!is.na(final_mat[,measure_name])) & (!apply(X=is.na(final_mat[,new_covs]),FUN=any,MARGIN=1)) & as.logical(final_mat[,covs[pos_in_covs]]),c(measure_name,new_covs)] } else { #only one cov: has to be 'sel': will be TRUE/FALSE for keep? #only one necessary for for loop new_covs=NULL data=final_mat[(!is.na(final_mat[,measure_name])) & as.logical(final_mat[,covs]),measure_name,drop=F] } } else { SEL=FALSE new_covs=covs #at least 2 covs if (!is.null(covs)&length(covs)>1) { data=final_mat[(!is.na(final_mat[,measure_name]))&(!apply(X=is.na(final_mat[,covs]),FUN=any,MARGIN=1)),c(measure_name,covs)] } else if (!is.null(covs)&length(covs)==1) { #only one cov data=final_mat[(!is.na(final_mat[,measure_name]))&(!is.na(final_mat[,covs])),c(measure_name,covs)] } else #no cov data=final_mat[!is.na(final_mat[,measure_name]),measure_name,drop=F] } print(head(data)) kinship=kinship_fixed #to go from cel names to ids motch=match(colnames(kinship),tot_genotyped[,'CEL_name']) kinship=kinship[!is.na(motch),!is.na(motch)] motch=match(colnames(kinship),tot_genotyped[,'CEL_name']) if (any(is.na(motch))) {print('erreur na dans motch') ; stop('erreur na dans motch')} ids_kinship=tot_genotyped[motch,'SUBJECT.NAME'] #ya des data qui ont pas ete genotype dc pas ds kinship #mais ya aussi ds kinship qui ont pas ete phenotypes (parents et founders) motch=match(rownames(data),ids_kinship) data=data[!is.na(motch),,drop=F] kinship=kinship[na.exclude(motch),na.exclude(motch)] load(paste('/data/HSrats/rats/genome_cache_Rnor5/chr1/subjects.RData',sep='')) mitch=match(colnames(kinship),subjects) if (any(is.na(mitch))) {print('erreur na dans mitch') ; stop('erreur na dans mitch')} transformed_pheno=data[,measure_name] x_mat=matrix(1,nrow=dim(data)[1],ncol=1) #length(NULL)=0 if (!is.null(new_covs)){ for (c in 1:length(new_covs)) { cov=new_covs[c] type_cov=covariates_types[cov] if (type_cov=='f') { old=data[,cov] old=as.factor(old) levels=levels(old) n=length(levels)-1 matrice=matrix(ncol=n,nrow=length(old),NA) colnames(matrice)=levels[1:n] for (l in levels[1:n]) { matrice[,l]=(old==l) } x_mat=cbind(x_mat,matrice) } else { data[,cov]=as.numeric(data[,cov]) x_mat=cbind(x_mat,data[,cov]) } } } n=dim(kinship)[1] vecti=matrix(1,ncol=1,nrow=n) p=diag(n)-(vecti%*%t(vecti))/n num=(n-1)*kinship denom=sum(diag(p%*%kinship%*%p)) unit_kinship=num/denom ###variance components estimation emma=try(emma.REMLE(y=transformed_pheno, X=x_mat, K=unit_kinship, ngrids=100, llim=-10, ulim=10,esp=1e-10, eig.R = NULL)) V=emma$vg*unit_kinship+emma$ve*diag(n) VCs=list(vg=emma$vg, ve=emma$ve) ##creates 'multiplicator' = matrix to apply to the mixed models to transform them into linear models #eigendecomposition of the covariance matrix svd=try(svd(V)) if (inherits(svd, "try-error")) {print('simulated_phenotype');next} #check that V = svd$u %*% diag(x=eigen_values) %*% t(svd$v) = svd$u %*% diag(x=eigen_values) %*% solve(svd$u) eigen_basis=svd$u eigen_values=svd$d teigen_basis=t(eigen_basis) #will multiply all terms by the inverse of r_mat which is so that V = R %*% R sqrt_eigen_values=sqrt(eigen_values) sqrt_lambda_matrix=diag(x=sqrt_eigen_values) r_mat=eigen_basis%*%sqrt_lambda_matrix%*%teigen_basis multiplicator=solve(r_mat) ##test association with all happy intervals (haplotypes) #keep only some of the animals in the design matrix chrs=paste('chr',c(1:20,'X'),sep='') results=c() for (chr in chrs) { #has all the names of the .RData in the genome cache, including bp.RData, subjects.RData etc that well get rid of by checking the dimansions load(paste('/data/HSrats/rats/genome_cache_Rnor5/',chr,'_all_markers.RData',sep='')) if (chr=='X') result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function_X,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) else result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) result=unlist(result) names(result)=content results=c(results,result) rm(list=c(content,'content')) } motch=match(names(results),map[,1]) table=data.frame(marker=names(results),pvalue=results,chr=map[motch,2],bp=map[motch,3]) table[,'pvalue']=as.numeric(table[,'pvalue']) write.table(table,file=paste('/data/HSrats/rats/cytokines_Rnor5/gls_design_',model_menu[i,1],'_',measure_name,'_additive.txt',sep=''),sep='\t',quote=F,row.names=F,col.names=F) return(VCs) } all_VCs=list() for (i in 1:dim(model_menu)[1]) { if (!is.na(model_menu[i,'map_w_mixed']) & model_menu[i,'map_w_mixed']=='yes') { print(model_menu[i,2]) res=try(my_do(i)) if (inherits(res, "try-error")) next all_VCs[model_menu[i,2]]=res } else next } save(all_VCs,file='/data/HSrats/rats/cytokines_Rnor5/EMMA_variance_components.RData')