#question1: what is the recombination rate of my hotspots in Nordborg's paper (within a 10000kb window or so) recomb.Nordborg <- function(my.hotspots, Nordborg.rhos, window){ return(apply(my.hotspots, 1, nearest.Nordborg.rho, Nordborg.rhos, window)) } nearest.Nordborg.rho <- function(hotspot, Nordborg.rhos, window){ chr = as.numeric(hotspot["Chrom"]) start = as.numeric(hotspot["Start"]) end = as.numeric(hotspot["End"]) within.Nordborg.rhos = Nordborg.rhos[(Nordborg.rhos$chr == chr) & ((Nordborg.rhos$startpos >= start-window & Nordborg.rhos$startpos <= end+window) | (Nordborg.rhos$endpos >= start-window & Nordborg.rhos$endpos <= end+window)),"rho_per_kb"] print(hotspot) print(length(within.Nordborg.rhos)) ifelse(length(within.Nordborg.rhos)>0, return(max(within.Nordborg.rhos)), return(0)) } #question2: select 1000 first hotspots from Nordborg paper and plot the distance from my nearest hotspot hottest.Nordborg <- function(Nordborg.rhos, my.hotspots, my.randoms, top.size = 1000){ source('/Volumes/martha/DPhil/Recombination/Results2/Clusters/within.R') N.rhos = sort(unique(Nordborg.rhos$rho_per_kb), decreasing = T) thres = N.rhos[top.size] print(thres) top.N.rhos = Nordborg.rhos[Nordborg.rhos$rho_per_kb >=thres ,] prev.startpos = top.N.rhos[1, "startpos"] prev.endpos = top.N.rhos[1, "endpos"] prev.chr = top.N.rhos[1, "chr"] concat.N.rhos = c() for(i in 2:nrow(top.N.rhos)){ curr.chr = top.N.rhos[i, "chr"] curr.startpos = top.N.rhos[i, "startpos"] curr.endpos = top.N.rhos[i, "endpos"] if(prev.endpos == curr.startpos){ prev.endpos = top.N.rhos[i, "endpos"] } else{ concat.N.rhos = rbind(concat.N.rhos, c(prev.chr, prev.startpos, prev.endpos)) prev.startpos = curr.startpos prev.chr = curr.chr prev.endpos = curr.endpos } } concat.N.rhos = as.data.frame(concat.N.rhos) colnames(concat.N.rhos) = c("chr", "startpos", "endpos") concat.N.rhos$distance.hotspots = apply.mindistance(concat.N.rhos, my.hotspots, 1, 2, 3, "Chrom", "Start", "End") concat.N.rhos$distance.randoms = apply.mindistance(concat.N.rhos, my.randoms, "chr", "startpos", "endpos", "Chrom", "Start", "End") #print(concat.N.rhos) return(concat.N.rhos) } Nordborg.summary.statistics <- function(my.hotspots, my.randoms, Nordborg.rhos, pdffile, Nordborg.sign.rho = 3){ pdf(pdffile) #plot density of Nordborg rhos N.rho.dens = density(Nordborg.rhos$rho_per_kb, adjust = 0.01) cold = which(N.rho.dens$x < Nordborg.sign.rho) hot = which(N.rho.dens$x >= Nordborg.sign.rho) plot(x = N.rho.dens$x[cold], y = N.rho.dens$y[cold], type = "l", lwd = 2, main = "Horton et al (2012) - Recombination Rates per region", ylab = "Density", xlab = "rho/kb", xlim = c(0,50)) lines(x = N.rho.dens$x[hot], y = N.rho.dens$y[hot], type = "l", lwd = 2, col = "firebrick") abline(v = Nordborg.sign.rho, lwd = 1, lty = "dashed") text(x = Nordborg.sign.rho+5, y = max(N.rho.dens$y), labels = c("Hotspots"), col = "firebrick") #zoom in density of Nordborg rhos plot(x = N.rho.dens$x[cold], y = N.rho.dens$y[cold], type = "l", lwd = 2, main = "Horton et al (2012) - Recombination Rates per region", ylab = "Density", xlab = "rho/kb", xlim = c(0,50), ylim = c(0,2)) lines(x = N.rho.dens$x[hot], y = N.rho.dens$y[hot], type = "l", lwd = 2, col = "firebrick") abline(v = Nordborg.sign.rho, lwd = 1, lty = "dashed") text(x = Nordborg.sign.rho+5, y = 2, labels = c("Hotspots"), col = "firebrick") #plot histogram of Nordborg rhos of my hotspots hist.10k = hist(my.hotspots$Nordborg.rho.10kb, breaks = seq(0, max(my.hotspots$Nordborg.rho.10kb)+1, by = 1), freq = T, main = "Recombination rate of hotspots in Horton et al.(2012)", ylab = "Number of hotspots", xlab = "Recombination rate in Horton et.al") plot(x=seq(0.5, max(hist.10k$breaks)-0.5), y = cumsum(hist.10k$counts), type = "h", main = "Cumulative sums of recombination rates of hotspots in Horton et al.(2012)", ylab = "Number of hotspots", xlab = "Recombination rate in Horton et.al") #plot densities of hotspots and random regions hot.rho.dens = density(my.hotspots$Nordborg.rho.10kb, adjust = 0.2) rand.rho.dens = density(my.randoms$Nordborg.rho.10kb, adjust = 0.2) pvalue.10k = format(t.test(x = my.hotspots$Nordborg.rho.10kb, y = my.randoms$Nordborg.rho.10kb, alternative = "greater")$p.value, digits = 2, scientific=T) plot(x = rand.rho.dens$x, y = rand.rho.dens$y, type = "l", lwd = 2, col = "navyblue", main = paste("pvalue =",pvalue.10k), xlab = "rate/kb", ylab = "Density") lines(x = hot.rho.dens$x, y = hot.rho.dens$y, type = "l", lwd = 2, col = "firebrick") legend(x = "topright", legend = c("Hotspots", "Controls"), fill = c("firebrick", "navyblue")) #correlation of recomb.rates my.rhos = 1000*my.hotspots$Recombinants/(my.hotspots$End - my.hotspots$Start) N.rhos = my.hotspots$Nordborg.rho.10kb corr.coeff = cor(x = my.rhos, y = N.rhos) plot(x = my.rhos, y = N.rhos, type = "p", pch = 16, main = paste("Correlation coefficient =", corr.coeff), xlab = "Recombinants/kb in MAGIC hotspots", ylab = "rho/kb in Horton et al.") dev.off() } hot.Nord = hottest.Nordborg(Nordborg.rhos, my.hotspots, my.randoms, 1000)