model='additive' nb_cores=10 ##transforms the mixed model into a linear model and gets pvalue of association my_function=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { #trick to deal with marker files starting with weird name element=element[mitch,] #element is the design matrix #to deal with subjects.data, strains.data etc that will be loaded too n=dim(element)[1] yt <- multiplicator %*% transformed_pheno x_matt = multiplicator %*% x_mat elementt <- multiplicator %*% element fit0=lm(yt~ -1+x_matt) fit1=lm(yt~ -1+x_matt+elementt) anova=anova(fit0,fit1) p=anova[,'Pr(>F)'][2] return(p) } my_function_X=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) { #trick to deal with marker files starting with weird name element=element[mitch,] #element is the design matrix #to deal with subjects.data, strains.data etc that will be loaded too n=dim(element)[1] yt <- multiplicator %*% transformed_pheno x_matt = multiplicator %*% x_mat elementt <- multiplicator %*% element fit0=lm(yt~ -1+x_matt) fit1=lm(yt~ -1+x_matt+elementt+x_matt[,2]:elementt) anova=anova(fit0,fit1) p=anova[,'Pr(>F)'][2] return(p) } ###### library(multicore) library(emma) #noramalized phenotypes final_mat=read.delim('/data/HSrats/rats/cytokines_Rnor5/Cytokines_CTnorm_BBnorm_DATA_140113.txt',row.names=1,as.is=T) my_f=function(col) { col=as.numeric(sub(',','.',col)) return(col) } final_mat=as.data.frame(lapply(final_mat,FUN=my_f)) #kinship matrix = IBS matrix load('/Net/fs1/groups/mott/Amelie/data_mining/kinship_oct2.RData') remove=match(c("VVD_RAT_13sep10.CEL","VVF_RAT_09sep10.CEL"),colnames(kinship)) kinship=kinship[-remove,-remove] #load('/data/HSrats/rats/GCTA/input4EMMA/genetic_corr_matrix_GCTA.RData') #kinship=corr2 kinship_fixed=kinship #corresp [kinship et ped] et phenotypes: link between cel files names and animals ids tot_genotyped=read.delim('/Net/fs1/groups/mott/Amelie/data_mining/new_tot_genotyped.txt',as.is=T,header=F) colnames(tot_genotyped)=c('CEL_name','SUBJECT.NAME') #measures and covariates model_menu=read.delim('/data/HSrats/rats/cytokines_Rnor5/model_menu_final_cytokines.txt',header=T,colClasses='character',na.strings = "") #map: location markers map=read.table('/data/HSrats/rats/genome_cache_Rnor5/all_map.txt',as.is=T) ###to deal properly with the covariates covariates=c() for(i in 1:dim(model_menu)[1]) { cov=strsplit(model_menu[i,'covariates'],',')[[1]] covariates=c(covariates,cov) } covariates=unique(covariates) covariates_types=rep('aie',length(covariates)) a=which(covariates=='sex') if (length(a)!=0) covariates_types[a]='f' a=which(covariates=='batch') if (length(a)!=0) covariates_types[a]='f' a=which(covariates=='is.albino') if (length(a)!=0) covariates_types[a]='f' a=which(covariates=='Haemalysis') if (length(a)!=0) covariates_types[a]='n' a=which(covariates=='BW_at_day28_pi') if (length(a)!=0) covariates_types[a]='n' a=which(covariates=='test_worked') if (length(a)!=0) covariates_types[a]='sel' a=which(covariates=='age') if (length(a)!=0) covariates_types[a]='n' a=which(covariates=='reliable') if (length(a)!=0) covariates_types[a]='sel' a=which(covariates=='BW_at_IPGTT') if (length(a)!=0) covariates_types[a]='n' a=which(covariates=='facs_day') if (length(a)!=0) covariates_types[a]='f' a=which(covariates=='BW_at_day9_pi') if (length(a)!=0) covariates_types[a]='n' a=which(covariates=='facs_day') if (length(a)!=0) covariates_types[a]='f' names(covariates_types)=covariates my_do=function(i) { measure_name=model_menu[i,2] print(measure_name) # if (file.exists(paste('/data/HSrats/rats/mapping_wo_duplicate/gls_design_',model_menu[i,1],'_',measure_name,'_',model,'_init.txt',sep=''))) return(NULL) ##first of all creates data which has measure and covariates if (!is.na(model_menu[i,'covariates'])) covs=strsplit(model_menu[i,'covariates'],',')[[1]] else covs=NULL #covariates_types are types which can be selected by their names = homogenous to covs if (!is.null(covs) && any(covariates_types[covs]=='sel')) { SEL=TRUE #at least 2 covs: 'sel' + un autre if (length(covs)>1) { pos_in_covs=which(covariates_types[covs] == 'sel') #new_covs without 'sel' new_covs=covs[-pos_in_covs] data=final_mat[(!is.na(final_mat[,measure_name])) & (!apply(X=is.na(final_mat[,new_covs]),FUN=any,MARGIN=1)) & as.logical(final_mat[,covs[pos_in_covs]]),c(measure_name,new_covs)] } else { #only one cov: has to be 'sel': will be TRUE/FALSE for keep? #only one necessary for for loop new_covs=NULL data=final_mat[(!is.na(final_mat[,measure_name])) & as.logical(final_mat[,covs]),measure_name,drop=F] } } else { SEL=FALSE new_covs=covs #at least 2 covs if (!is.null(covs)&length(covs)>1) { data=final_mat[(!is.na(final_mat[,measure_name]))&(!apply(X=is.na(final_mat[,covs]),FUN=any,MARGIN=1)),c(measure_name,covs)] } else if (!is.null(covs)&length(covs)==1) { #only one cov data=final_mat[(!is.na(final_mat[,measure_name]))&(!is.na(final_mat[,covs])),c(measure_name,covs)] } else #no cov data=final_mat[!is.na(final_mat[,measure_name]),measure_name,drop=F] } kinship=kinship_fixed #to go from cel names to ids motch=match(colnames(kinship),tot_genotyped[,'CEL_name']) if (any(is.na(motch))) {print('erreur na dans motch') ; stop('erreur na dans motch')} ids_kinship=tot_genotyped[motch,'SUBJECT.NAME'] #ya des data qui ont pas ete genotype dc pas ds kinship #mais ya aussi ds kinship qui ont pas ete phenotypes (parents et founders) motch=match(rownames(data),ids_kinship) data=data[!is.na(motch),,drop=F] kinship=kinship[na.exclude(motch),na.exclude(motch)] load(paste('/data/HSrats/rats/genome_cache_Rnor5/chr1/subjects.RData',sep='')) mitch=match(colnames(kinship),subjects) if (any(is.na(mitch))) {print('erreur na dans mitch') ; stop('erreur na dans mitch')} transformed_pheno=data[,measure_name] x_mat=matrix(1,nrow=dim(data)[1],ncol=1) #length(NULL)=0 x_mat=cbind(x_mat,data[,new_covs]) n=dim(kinship)[1] vecti=matrix(1,ncol=1,nrow=n) p=diag(n)-(vecti%*%t(vecti))/n num=(n-1)*kinship denom=sum(diag(p%*%kinship%*%p)) unit_kinship=num/denom ###variance components estimation emma=try(emma.REMLE(y=transformed_pheno, X=x_mat, K=unit_kinship, ngrids=100, llim=-10, ulim=10,esp=1e-10, eig.R = NULL)) V=emma$vg*unit_kinship+emma$ve*diag(n) VCs=c(vg=emma$vg, ve=emma$ve) ##creates 'multiplicator' = matrix to apply to the mixed models to transform them into linear models #eigendecomposition of the covariance matrix svd=try(svd(V)) if (inherits(svd, "try-error")) {print('simulated_phenotype');next} #check that V = svd$u %*% diag(x=eigen_values) %*% t(svd$v) = svd$u %*% diag(x=eigen_values) %*% solve(svd$u) eigen_basis=svd$u eigen_values=svd$d teigen_basis=t(eigen_basis) #will multiply all terms by the inverse of r_mat which is so that V = R %*% R sqrt_eigen_values=sqrt(eigen_values) sqrt_lambda_matrix=diag(x=sqrt_eigen_values) r_mat=eigen_basis%*%sqrt_lambda_matrix%*%teigen_basis multiplicator=solve(r_mat) ##test association with all happy intervals (haplotypes) #keep only some of the animals in the design matrix chrs=paste('chr',c(1:20,'X'),sep='') results=c() for (chr in chrs) { #has all the names of the .RData in the genome cache, including bp.RData, subjects.RData etc that well get rid of by checking the dimansions load(paste('/data/HSrats/rats/genome_cache_Rnor5/',chr,'_all_markers.RData',sep='')) if (chr=='X') result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function_X,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) else result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) result=unlist(result) names(result)=content results=c(results,result) rm(list=c(content,'content')) } motch=match(names(results),map[,1]) table=data.frame(marker=names(results),pvalue=results,chr=map[motch,2],bp=map[motch,3]) table[,'pvalue']=as.numeric(table[,'pvalue']) write.table(table,file=paste('/data/HSrats/rats/cytokines_Rnor5/gls_design_',model_menu[i,1],'_',measure_name,'_additive.txt',sep=''),sep='\t',quote=F,row.names=F,col.names=F) return(VCs) } all_VCs=list() for (i in 1:dim(model_menu)[1]) { print(i) if (!is.na(model_menu[i,'map_w_mixed']) & model_menu[i,'map_w_mixed']=='yes') { print(model_menu[i,2]) res=try(my_do(i)) if (inherits(res, "try-error")) next all_VCs[model_menu[i,2]]=res } else next } save(all_VCs,file='/data/HSrats/rats/cytokines_Rnor5/EMMA_variance_components.RData')