#load necessary libaries lib.list<-c("stringr", "ggplot2") lapply(lib.list, library, character.only=TRUE) #to install missing packages run: install.packages(lib.list) #load functions to run mixed model source("mixed.model.functions.r") ### location of input files phenotype="GS55_1" #Column name of the phenotype to be analysed id.column="line_name" #Column name with sample names (must match sample names of genotype data) phenfile="DATA/PHENOTYPES/NDM_phenotypes.tsv" pruned_traw_file<-"DATA/MAGIC_IMPUTED_PRUNED/MAGIC_imputed.pruned.traw" #imputed MAGIC RIL genotypes in traw format, after LD pruning with PLINK option --indep-pairwise 500 10 0.99 ### location of output files out_dir<-paste0("OUTPUT/",phenotype,"/") dir.create(out_dir, showWarnings = FALSE) ### parameters n.perm=1000 #number of permutations to run to establish significance thresholds alpha=c(0.05,0.01) #genomewide significance levels ### make genetic relationship matrix pruned_grm_file<-str_replace(pruned_traw_file, ".traw", ".grm") #default output location for GRM make_grm(traw_file=pruned_traw_file, output_filename=pruned_grm_file) ### run SNP associations snp.output<-mixed.model.snps(phenfile, phenotype, id.column, grm_file, pruned_traw_file, n.perm, covar=NULL, rename_CHR=TRUE, rename_samples=TRUE) #option covar can be used to include covariates #option rename_CHR takes chromosome names from the SNP column of the traw using the part before the first colon ":" (e.g., SNPs "1A:XXXXX", "1B:XXXXX" convert the CHR column to "1A" and "1B"). This is because PLINK requires the CHR column to be numeric. #option rename_samples takes sample names from the traw file as the part before the first underscore. ### snp.output is a list of up to three parts: # snp.output$logP contains the positions of the SNPs and the logP value (if n.perm>0, perm.adj.pvalue is the fraction of permutations that produce at least one p value smaller than the empirical p value) # snp.output$vg is the genetic variance (heritability) # snp.output$max.logP.perm (only produced when n.perm>0) gives the maximum logP (smallest P value) from each of n.perm phenotype permutations ### we also supply some functions to plot the results #load plotting functions source("plot.functions.r") # produce a manhattan plot snp.manhattan.plot(snp.output, snp.manhattan.filename=paste0(out_dir,"snp.manhattan.pdf"), alpha=c(0.05,0.01))