model='additive'
nb_cores=16

##transforms the mixed model into a linear model and gets pvalue of association
my_function=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) {
#trick to deal with marker files starting with weird name
	element=element[mitch,]
#element is the design matrix
#to deal with subjects.data, strains.data etc that will be loaded too
	n=dim(element)[1]		
	yt <- multiplicator %*% transformed_pheno
	x_matt = multiplicator %*% x_mat
	elementt <- multiplicator %*% element
	fit0=lm(yt~	-1+x_matt)
	fit1=lm(yt~	-1+x_matt+elementt)
	anova=anova(fit0,fit1)
	p=anova[,'Pr(>F)'][2]
	return(p)
}

my_function_X=function(element,mitch,multiplicator,transformed_pheno,x_mat,model) {
#trick to deal with marker files starting with weird name
	element=element[mitch,]
#element is the design matrix
#to deal with subjects.data, strains.data etc that will be loaded too
	n=dim(element)[1]		
	yt <- multiplicator %*% transformed_pheno
	x_matt = multiplicator %*% x_mat
	elementt <- multiplicator %*% element
	fit0=lm(yt~	-1+x_matt)
	fit1=lm(yt~	-1+x_matt+elementt+x_matt[,2]:elementt)
	anova=anova(fit0,fit1)
	p=anova[,'Pr(>F)'][2]
	return(p)
}

######
library(multicore)
library(emma)


final_mat=read.delim('/data/HSrats/rats/cytokines_Rnor5/Cytokines_CTnorm_BBnorm_DATA_140220.txt',as.is=T,row.names=1)
w=which(colnames(final_mat)=='SEX')
colnames(final_mat)[w]='sex'
w=which(colnames(final_mat)=='BATCH')
colnames(final_mat)[w]='batch'
final_mat[,'batch']=paste('Batch',final_mat[,'batch'])
for (i in 1:dim(final_mat)[2]) {
    w=which(grepl(',',final_mat[,i],fixed=T))
    if (length(w)!=0) final_mat[w,i]=sub(',','.',final_mat[w,i],fixed=T)
    if (any(grepl(',',final_mat[,i],fixed=T))) print(i)
}


#kinship matrix = IBS matrix
load('/data/HSrats/rats/genome_cache_Rnor5/kinship_Rnor5_pairwise_complete.RData')	
remove=match(c("VVD_RAT_13sep10.CEL","VVF_RAT_09sep10.CEL"),colnames(kinship))
kinship=kinship[-remove,-remove]

#load('/data/HSrats/rats/GCTA/input4EMMA/genetic_corr_matrix_GCTA.RData')
#kinship=corr2
kinship_fixed=kinship
#corresp [kinship et ped] et phenotypes: link between cel files names and animals ids
tot_genotyped=read.delim('/Net/fs1/groups/mott/Amelie/data_mining/new_tot_genotyped.txt',as.is=T,header=F)
colnames(tot_genotyped)=c('CEL_name','SUBJECT.NAME')
#measures and covariates
model_menu=read.delim('/data/HSrats/rats/cytokines_Rnor5/model_menu_final_cytokines.txt',header=T,colClasses='character',na.strings = "")
model_menu[,'covariates']=sub('SEX','sex',model_menu[,'covariates'])
model_menu[,'covariates']=sub('BATCH','batch',model_menu[,'covariates'])

#map: location markers
map=read.table('/data/HSrats/rats/genome_cache_Rnor5/all_map.txt',as.is=T)

###to deal properly with the covariates
covariates=c()
for(i in 1:dim(model_menu)[1]) {
	cov=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]]
	covariates=c(covariates,cov)
}
covariates=unique(covariates)
any(grep(' ',covariates))
#FALSE
covariates_types=rep('n',length(covariates))
motch=match(c('sex','batch'),covariates)
covariates_types[motch]='f'
names(covariates_types)=covariates

my_do=function(i) {
	measure_name=model_menu[i,2]
	print(measure_name)

    #	if (file.exists(paste('/data/HSrats/rats/mapping_wo_duplicate/gls_design_',model_menu[i,1],'_',measure_name,'_',model,'_init.txt',sep=''))) return(NULL)
	
##first of all creates data which has measure and covariates
	if (!is.na(model_menu[i,'covariates'])) covs=strsplit(model_menu[i,'covariates'],' + ',fixed=T)[[1]] else  covs=NULL
#covariates_types are types which can be selected by their names = homogenous to covs
	if (!is.null(covs) && any(covariates_types[covs]=='sel')) {
		SEL=TRUE
#at least 2 covs: 'sel' + un autre
		if (length(covs)>1) {
			pos_in_covs=which(covariates_types[covs] == 'sel')
#new_covs without 'sel'
			new_covs=covs[-pos_in_covs]
			data=final_mat[(!is.na(final_mat[,measure_name])) & (!apply(X=is.na(final_mat[,new_covs]),FUN=any,MARGIN=1)) & as.logical(final_mat[,covs[pos_in_covs]]),c(measure_name,new_covs)] 

		} else {
#only one cov: has to be 'sel': will be TRUE/FALSE for keep?
#only one necessary for for loop
			new_covs=NULL
			data=final_mat[(!is.na(final_mat[,measure_name])) & as.logical(final_mat[,covs]),measure_name,drop=F]

		}
	} else {
		SEL=FALSE
		new_covs=covs
		
#at least 2 covs
		if (!is.null(covs)&length(covs)>1) {
			data=final_mat[(!is.na(final_mat[,measure_name]))&(!apply(X=is.na(final_mat[,covs]),FUN=any,MARGIN=1)),c(measure_name,covs)] 

		} else if (!is.null(covs)&length(covs)==1) {
#only one cov
			data=final_mat[(!is.na(final_mat[,measure_name]))&(!is.na(final_mat[,covs])),c(measure_name,covs)] 

		} else 
#no cov
		data=final_mat[!is.na(final_mat[,measure_name]),measure_name,drop=F]
	}
	
    print(head(data))
    
	kinship=kinship_fixed
#to go from cel names to ids
	motch=match(colnames(kinship),tot_genotyped[,'CEL_name'])
    kinship=kinship[!is.na(motch),!is.na(motch)]
 	motch=match(colnames(kinship),tot_genotyped[,'CEL_name'])
	if (any(is.na(motch))) {print('erreur na dans motch') ; stop('erreur na dans motch')}
	ids_kinship=tot_genotyped[motch,'SUBJECT.NAME']
	
#ya des data qui ont pas ete genotype dc pas ds kinship
#mais ya aussi ds kinship qui ont pas ete phenotypes (parents et founders)
	motch=match(rownames(data),ids_kinship)
	data=data[!is.na(motch),,drop=F]
	kinship=kinship[na.exclude(motch),na.exclude(motch)]
	
	load(paste('/data/HSrats/rats/genome_cache_Rnor5/chr1/subjects.RData',sep=''))
	mitch=match(colnames(kinship),subjects)
	if (any(is.na(mitch))) {print('erreur na dans mitch') ; stop('erreur na dans mitch')}
	transformed_pheno=data[,measure_name]
	
	x_mat=matrix(1,nrow=dim(data)[1],ncol=1)
#length(NULL)=0

	if (!is.null(new_covs)){
		for (c in 1:length(new_covs)) {
			cov=new_covs[c]
			type_cov=covariates_types[cov]
			if (type_cov=='f') {
				old=data[,cov]
				old=as.factor(old)
				levels=levels(old)
				n=length(levels)-1
				matrice=matrix(ncol=n,nrow=length(old),NA)
				colnames(matrice)=levels[1:n]
				for (l in levels[1:n]) {
					matrice[,l]=(old==l)
				}
				x_mat=cbind(x_mat,matrice)
			} else {
				data[,cov]=as.numeric(data[,cov])
				x_mat=cbind(x_mat,data[,cov])
			}
		}
	}
	n=dim(kinship)[1]
    
	vecti=matrix(1,ncol=1,nrow=n)
	p=diag(n)-(vecti%*%t(vecti))/n
	num=(n-1)*kinship
	denom=sum(diag(p%*%kinship%*%p))
	unit_kinship=num/denom    
    
###variance components estimation
	emma=try(emma.REMLE(y=transformed_pheno, X=x_mat, K=unit_kinship, ngrids=100, llim=-10, ulim=10,esp=1e-10, eig.R = NULL))	
	
	V=emma$vg*unit_kinship+emma$ve*diag(n)

    VCs=list(vg=emma$vg, ve=emma$ve)
##creates 'multiplicator' = matrix to apply to the mixed models to transform them into linear models
	#eigendecomposition of the covariance matrix
	svd=try(svd(V))
	if (inherits(svd, "try-error")) {print('simulated_phenotype');next}
	#check that V = svd$u %*% diag(x=eigen_values) %*% t(svd$v) = svd$u %*% diag(x=eigen_values) %*% solve(svd$u)
	eigen_basis=svd$u
	eigen_values=svd$d
	teigen_basis=t(eigen_basis)

	#will multiply all terms by the inverse of r_mat which is so that V = R %*% R
	sqrt_eigen_values=sqrt(eigen_values)
	sqrt_lambda_matrix=diag(x=sqrt_eigen_values)
	r_mat=eigen_basis%*%sqrt_lambda_matrix%*%teigen_basis
	multiplicator=solve(r_mat)
	
##test association with all happy intervals (haplotypes)
	#keep only some of the animals in the design matrix
	chrs=paste('chr',c(1:20,'X'),sep='')
	results=c()
	for (chr in chrs) {
		#has all the names of the .RData in the genome cache, including bp.RData, subjects.RData etc that well get rid of by checking the dimansions
		load(paste('/data/HSrats/rats/genome_cache_Rnor5/',chr,'_all_markers.RData',sep=''))
		if (chr=='X') result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function_X,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores) else
		result=mclapply(X=mget(content,envir=as.environment(-1)),FUN=my_function,mitch=mitch,multiplicator=multiplicator,transformed_pheno=transformed_pheno,x_mat=x_mat,model=model,mc.cores=nb_cores)
		result=unlist(result)
		names(result)=content
		results=c(results,result)
		rm(list=c(content,'content'))
	}
	motch=match(names(results),map[,1])
	table=data.frame(marker=names(results),pvalue=results,chr=map[motch,2],bp=map[motch,3])
	table[,'pvalue']=as.numeric(table[,'pvalue'])
	write.table(table,file=paste('/data/HSrats/rats/cytokines_Rnor5/gls_design_',model_menu[i,1],'_',measure_name,'_additive.txt',sep=''),sep='\t',quote=F,row.names=F,col.names=F)
	return(VCs)
}

all_VCs=list()
for (i in 1:dim(model_menu)[1]) {
	if (!is.na(model_menu[i,'map_w_mixed']) & model_menu[i,'map_w_mixed']=='yes') {
		print(model_menu[i,2])
		res=try(my_do(i))
		if (inherits(res, "try-error")) next
        all_VCs[model_menu[i,2]]=res
	} else next
}

save(all_VCs,file='/data/HSrats/rats/cytokines_Rnor5/EMMA_variance_components.RData')