library(readr)

#read in founder SNP genotype calls in PLINK traw format
traw_Founders_path="DATA/FOUNDERS/Founders.traw"
traw_Founders<-read_tsv(traw_Founders_path)

#the founder.mosaics function in this script is the core element of the dynamic programming algorithm. 
source("compare.founders.r")

#the founder.mosaics function has two parameters, the penalty score for matching/mismatching and the 'jumpcost' for switching between states, which is scored relative to the mismatch penalty. 
mismatch=1
#The following loop creates a series of RData objects corresponding to different jumpcost parameters
for(jump in c(1,2,5,10,20,50,100,200,500,1000, 2000,5000)){founder.mosaics(traw_Founders, mismatch=mismatch, jump=jump)}
#These results are then loaded in and the number of haplotype groups is counted and saved as a .tsv file
for(jump in c(1,2,5,10,20,50,100,200,500,1000,2000, 5000)){
	load(paste0("RData/mosaics.mm", mismatch, ".j", jump, ".RData"))
	mosaic<-tbl_df(do.call(rbind, mosaics))
	num_groups<-apply(mosaic[,7:ncol(mosaic)], 1, function(x)(length(levels(as.factor(x)))))
	write_tsv(tbl_df(data.frame(jumpcost=jump, num_groups=num_groups)), path=paste0("RData/num_groups_mm", mismatch, "_jc", jump,".tsv"))}

#Calculating these similarity/disimilarity mosaics will take some time

########################
#### plot sumarising haplotype number across parameters
########################

#The results from the analysis above can then be read in and plotted in various ways
grp_tbl<-tbl_df(rbind(
	read_tsv("RData/num_groups_mm1_jc2"),
	read_tsv("RData/num_groups_mm1_jc5"),
	read_tsv("RData/num_groups_mm1_jc10"),
	read_tsv("RData/num_groups_mm1_jc20"),
	read_tsv("RData/num_groups_mm1_jc50"),
	read_tsv("RData/num_groups_mm1_jc100"),
	read_tsv("RData/num_groups_mm1_jc200"),
	read_tsv("RData/num_groups_mm1_jc500"),
	read_tsv("RData/num_groups_mm1_jc1000"),
	read_tsv("RData/num_groups_mm1_jc2000.tsv"),
	read_tsv("RData/num_groups_mm1_jc5000")
	))

hap_summary_plot<-grp_tbl %>% group_by(jumpcost, num_groups) %>%
	filter(!jumpcost %in% c(1,10,100, 1000)) %>%
	summarise(number=n()) %>%
	ungroup() %>% group_by(jumpcost) %>% mutate(perc=100*number/sum(number)) %>%
	ggplot(aes(x=num_groups, y= perc, colour=factor(jumpcost), group=factor(jumpcost))) + 
	geom_line() + scale_colour_viridis_d(option="plasma", end=0.95) +
	plot_theme1 + 
	ylab("percentage SNPs") +
	xlab("number of haplotypes") +
	labs(colour = "jumpcost") +
	theme()

ggsave(filename="plots/haplotype_summary.pdf", plot=hap_summary_plot, height=2.75, width=4)

####################
#### produce plots for one parameter set
####################

#The folowing plots the inferred founder similarity mosaics for a single parameter set, given here
mismatch=1
jump=200
load(paste0("mosaics.mm", mismatch, ".j", jump, ".RData"))
mosaic<-tbl_df(do.call(rbind, mosaics))
groups<-t(apply(mosaic[,7:ncol(mosaic)], 1, function(x)(as.numeric(as.factor(x)))))
colnames(groups)<-colnames(mosaic)[7:ncol(mosaic)]

mosaic_plot<-tbl_df(cbind(mosaic[,1:6], groups)) %>%
	filter(row_number() %% 5 == 0) %>% #filter rows
	group_by(chromosome) %>%
	mutate(width=lead(position)-position) %>%
	filter(!is.na(width), !width==1) %>%
	gather(key=founder, value=haplotype, colnames(mosaic)[7:ncol(mosaic)])

all_chr_plot<-mosaic_plot %>%
	filter(!chromosome=="Un") %>%
	ggplot(aes(x=(position+(width/2))/1000000, y=founder, fill=factor(haplotype), width = width/1000000)) +
	geom_tile() +
	scale_fill_manual(values=c("grey40",cp)) +
	theme_minimal()  +
	facet_wrap(~chromosome, nrow=3, dir="v") +
	xlab("position (mb)") +
	plot_theme1 +
	theme(legend.position = "none", panel.grid.major.y=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank(), axis.title.y=element_blank(), panel.border = element_blank())

ggsave(filename=paste0("plots/haplotypes_mm", mismatch, "_jc", jump,".png"), plot= all_chr_plot, height=7, width=14, dpi=300)