library(readr) #read in founder SNP genotype calls in PLINK traw format traw_Founders_path="DATA/FOUNDERS/Founders.traw" traw_Founders<-read_tsv(traw_Founders_path) #the founder.mosaics function in this script is the core element of the dynamic programming algorithm. source("compare.founders.r") #the founder.mosaics function has two parameters, the penalty score for matching/mismatching and the 'jumpcost' for switching between states, which is scored relative to the mismatch penalty. mismatch=1 #The following loop creates a series of RData objects corresponding to different jumpcost parameters for(jump in c(1,2,5,10,20,50,100,200,500,1000, 2000,5000)){founder.mosaics(traw_Founders, mismatch=mismatch, jump=jump)} #These results are then loaded in and the number of haplotype groups is counted and saved as a .tsv file for(jump in c(1,2,5,10,20,50,100,200,500,1000,2000, 5000)){ load(paste0("RData/mosaics.mm", mismatch, ".j", jump, ".RData")) mosaic<-tbl_df(do.call(rbind, mosaics)) num_groups<-apply(mosaic[,7:ncol(mosaic)], 1, function(x)(length(levels(as.factor(x))))) write_tsv(tbl_df(data.frame(jumpcost=jump, num_groups=num_groups)), path=paste0("RData/num_groups_mm", mismatch, "_jc", jump,".tsv"))} #Calculating these similarity/disimilarity mosaics will take some time ######################## #### plot sumarising haplotype number across parameters ######################## #The results from the analysis above can then be read in and plotted in various ways grp_tbl<-tbl_df(rbind( read_tsv("RData/num_groups_mm1_jc2"), read_tsv("RData/num_groups_mm1_jc5"), read_tsv("RData/num_groups_mm1_jc10"), read_tsv("RData/num_groups_mm1_jc20"), read_tsv("RData/num_groups_mm1_jc50"), read_tsv("RData/num_groups_mm1_jc100"), read_tsv("RData/num_groups_mm1_jc200"), read_tsv("RData/num_groups_mm1_jc500"), read_tsv("RData/num_groups_mm1_jc1000"), read_tsv("RData/num_groups_mm1_jc2000.tsv"), read_tsv("RData/num_groups_mm1_jc5000") )) hap_summary_plot<-grp_tbl %>% group_by(jumpcost, num_groups) %>% filter(!jumpcost %in% c(1,10,100, 1000)) %>% summarise(number=n()) %>% ungroup() %>% group_by(jumpcost) %>% mutate(perc=100*number/sum(number)) %>% ggplot(aes(x=num_groups, y= perc, colour=factor(jumpcost), group=factor(jumpcost))) + geom_line() + scale_colour_viridis_d(option="plasma", end=0.95) + plot_theme1 + ylab("percentage SNPs") + xlab("number of haplotypes") + labs(colour = "jumpcost") + theme() ggsave(filename="plots/haplotype_summary.pdf", plot=hap_summary_plot, height=2.75, width=4) #################### #### produce plots for one parameter set #################### #The folowing plots the inferred founder similarity mosaics for a single parameter set, given here mismatch=1 jump=200 load(paste0("mosaics.mm", mismatch, ".j", jump, ".RData")) mosaic<-tbl_df(do.call(rbind, mosaics)) groups<-t(apply(mosaic[,7:ncol(mosaic)], 1, function(x)(as.numeric(as.factor(x))))) colnames(groups)<-colnames(mosaic)[7:ncol(mosaic)] mosaic_plot<-tbl_df(cbind(mosaic[,1:6], groups)) %>% filter(row_number() %% 5 == 0) %>% #filter rows group_by(chromosome) %>% mutate(width=lead(position)-position) %>% filter(!is.na(width), !width==1) %>% gather(key=founder, value=haplotype, colnames(mosaic)[7:ncol(mosaic)]) all_chr_plot<-mosaic_plot %>% filter(!chromosome=="Un") %>% ggplot(aes(x=(position+(width/2))/1000000, y=founder, fill=factor(haplotype), width = width/1000000)) + geom_tile() + scale_fill_manual(values=c("grey40",cp)) + theme_minimal() + facet_wrap(~chromosome, nrow=3, dir="v") + xlab("position (mb)") + plot_theme1 + theme(legend.position = "none", panel.grid.major.y=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank(), axis.title.y=element_blank(), panel.border = element_blank()) ggsave(filename=paste0("plots/haplotypes_mm", mismatch, "_jc", jump,".png"), plot= all_chr_plot, height=7, width=14, dpi=300)