#The breakpoints file should contain the non-clustered breakpoints library(multicore) hotspots <- function(breakpoints, window = 10000, alpha = 0.01, stepsize = 10000, bindir = "", hotspots.file = "", ratesOnly = FALSE, ratesfile = ""){ if(grepl("Chr", breakpoints[1, "chr"]) == "F") breakpoints[1,"chr"] = paste("Chr", breakpoints[1,"chr"], sep = "") recombinants.per.chr = by(breakpoints, breakpoints$chr, function(x){return(x)}) print(recombinants.per.chr) recomb = data.frame(Chr = c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5"), Chr.length = c(30427671, 19698289, 23459830, 18585056, 26975502) , Recombinants = sapply(recombinants.per.chr, nrow)) print(recomb) recomb$Rate.per.kb = (1000*recomb$Recombinants)/recomb$Chr.length if (ratesOnly){ write.table(recomb, ratesfile, quote = F, sep = "\t", row.names = FALSE) return(0) } print(recomb[,"Chr"]) hotspot.list = by(recomb, recomb$Chr, hotspots.chr, window, recombinants.per.chr, alpha, bindir, stepsize) hotspots.table = do.call("rbind", hotspot.list) rownames(hotspots.table) = NULL if (length(hotspots.file)>0){ write.table(hotspots.table, hotspots.file, quote = F, sep = "\t") } print(paste("Hotspots =", nrow(hotspots.table))) return(hotspots.table) } #hotspots.chr returns a list of hotspots, plots the hotspots and gives the rate per bin hotspots.chr <- function(recomb.chr, window, recombinants.per.chr, alpha, bindir, stepsize){ chr = recomb.chr[1, "Chr"] print(chr) len = recomb.chr[1, "Chr.length"] rate = recomb.chr[1, "Rate.per.kb"] recombinants = apply(recombinants.per.chr[[chr]][,c("startp", "endp")], 1, median) print(recombinants[1:10]) bins = data.frame(start = seq(1, len-window, by = stepsize), end = seq(window, len, by = stepsize)) bin.inds = seq(1, len-window, by = stepsize) rec.per.bin = unlist(mclapply(bin.inds, function(x, recombinants, window){return(length(recombinants[recombinants>=x & recombinants <= (x+window)]))}, recombinants, window, mc.cores = 15)) bins$recombinants = rec.per.bin print("RECOMBINANTS") bins$p.values = unlist(mclapply(bins$recombinants, function(x){return(binom.test(x, n = window, p = rate/1000)$p.value)}, mc.cores = 15)) print("P-VALUES") bins$p.values.adj = p.adjust(bins$p.values, method = "fdr") if (length(bindir) > 0){ write.table(bins, paste(bindir, "Bins.", chr, ".txt", sep = ""), quote = F, sep = "\t") } hotspots = bins[bins$p.values.adj <= alpha,] hotspots = cbind(chr, hotspots) print(hotspots) #plot.hotspots(bins, hotspots, chr, window) return(hotspots) } plot.hotspots <- function(bindir, hotspots, window, alpha, pdffile, plot.clusters = T, clusters.bindir = "", plot.legend = T){ pdf(pdffile) files = list.files(path = bindir) files = files[grep("Bins", files)] pch = 18 par(mfrow = c(3,1), cex = 0.6, pch = pch) for (f in files){ bins = read.delim(paste(bindir, f, sep = ""), header = T) chr = substr(f, start = 9, stop = 9) print(chr) x = apply(bins[,c("start", "end")], 1, median) y = bins$recombinants*(1000/window) x.hot = apply(hotspots[hotspots[,"Chrom"]==paste("Chr", chr, sep = ""),c("Start", "End")], 1, median) print(length(x.hot)) print(x.hot[1:10]) y.hot = max(y) + 1 print("PLOTTING...") plot(x = x/1000000, y = y, type = "l", main = paste("Chromosome", chr), xlab = "Mb", ylab = "Rate (recombinants/kb)", ylim = c(-30, y.hot+1), col = "darkslategrey") points(x = x.hot/1000000, y = rep(y.hot, length(x.hot)), type = "p", pch = 18, col = "navyblue") if(clusters.bindir!=""){ c.bins = read.delim(paste(clusters.bindir, f, sep = ""), header = T) c.x = apply(c.bins[,c("start", "end")], 1, median) c.y = -c.bins$recombinants*(1000/window) lines(x = c.x/1000000, y = c.y, type = "l", col = "#EF8A62") if(plot.legend) legend(x = "bottomright", legend = c("Breakpoints distribution", "Clusters distribution","Hotspots"), lty = c(1, 1, NA), lwd = c(2, 2, NA), pch = c(NA,NA,pch), col = c("darkslategrey", "#EF8A62", "navyblue")) } else{ if(plot.legend) legend(x = "topright", legend = c("Recombination Rate", "Hotspots"), lty = c(1, NA), lwd = c(2, NA), pch = c(NA, pch), col = c("darkslategrey", "navyblue")) } } if(plot.legend == FALSE){ plot(1, type="n", axes=F, xlab="", ylab="") if(clusters.bindir!=0) legend(x = "center", legend = c("Breakpoints distribution", "Clusters distribution","Hotspots"), lty = c(1, 1, NA), lwd = c(2, 2, NA), pch = c(NA,NA,pch), col = c("darkslategrey", "#EF8A62", "navyblue")) else legend(x = "center", legend = c("Recombination Rate", "Hotspots"), lty = c(1, NA), lwd = c(2, NA), pch = c(NA, pch), col = c("darkslategrey", "navyblue")) } dev.off() } #br.not.in.cl = read.delim("./../Clusters/breaks.not.in.clusters.txt", header = TRUE) #h = hotspots(br.not.in.cl, window = 1000, stepsize = 1000, bindir = "./Bins-1k-1kstepsize/", hotspots.file = "./Bins-1k-1kstepsize/hotspots10k.txt", ratesOnly = TRUE, ratesfile = "./rates.txt") #plot.hotspots("./", window = 10000, alpha = 0.01, pdffile = "./hotspots.pdf") SW.hotspots <- function(bin, chr, alpha = 0.01, rate=0.1){ bin$score = bin$recombinants bin$score[bin$score == 0] = -1 hotspots = NULL count = 1 while(length(bin$score[bin$score>0])>0){ count = count + 1 s.prev = 0 b.prev = 0 s = c() b = c() for(i in 1:nrow(bin)){ if(s.prev + bin$score[i] > 0){ s.curr = s.prev + bin$score[i] } else{ s.curr = 0 } if(s.curr>0){ b.curr = b.prev } else{ b.curr = bin$end[i] } b.prev = b.curr s.prev = s.curr s = c(s, s.curr) b = c(b, b.curr) } maxind = which(s == max(s))[1] from.ind = which(b == b[maxind])[1] + 1 to.ind = maxind hotspots = rbind(hotspots, c(chr, as.numeric(b[maxind]), as.numeric(bin$end[maxind]), as.numeric(sum(bin$recombinants[from.ind:to.ind])))) bin$score[from.ind:to.ind] = 0 } colnames(hotspots) = c("Chrom", "Start", "End", "Recombinants") print(hotspots) #hotspots$p.values = round(unlist(apply(hotspots, 1 function(x){return(binom.test(as.numeric(x["Recombinants"]), n = as.numeric(x["End"])-as.numeric(x["Start"]), p = rate/1000)$p.value)})), 5) #print("P-VALUES") #hotspots$p.values.adj = round(p.adjust(hotspots$p.values, method = "fdr"), 5) #hotspots = hotspots[hotspots$p.values.adj >= alpha] return(hotspots) } get.dynamic.hotspots <- function(bindir = "./Bins-1k-1kstepsize/", rates.file = "./rates.txt", chroms = c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5"), hotspots.file = "./Bins-1k-1kstepsize/hotspots.dynamic.txt", bins.file = "./Bins-1k-1kstepsize/binsize.dynamic.txt"){ rates = read.delim("./rates.txt", header = T) bin.list = lapply(chroms, function(chr){read.delim(paste(bindir, "Bins.", chr, ".txt", sep = ""), header = T)}) names(bin.list) = chroms h.list = mclapply(chroms, function(chr){SW.hotspots(bin=bin.list[[chr]], chr=chr, alpha=0.01, rate=rates[rates[,"Chr"]==chr, "Rate.per.kb"])}, mc.cores = 5) hotspots = do.call("rbind", h.list) write.table(hotspots, bins.file, row.names = F, quote = F, sep = "\t") hotspots$p.values = unlist(apply(hotspots, 1, function(x){return(binom.test(as.numeric(x["Recombinants"]), n = as.numeric(x["End"])-as.numeric(x["Start"]), p = rate/1000)$p.value)})) hotspots$p.values.adj = p.adjust(hotspots$p.values, method = "fdr") hotspots.sign = hotspots[hotspots$p.values.adj <= 0.01,] write.table(hotspots.sign, hotspots.file, row.names = F, quote = F, sep = "\t") } #breakpoints = read.delim("./../Clusters/breaks.in.clusters.txt", header = T) #hotspots(breakpoints = breakpoints, window = 10000, stepsize = 10000, bindir = "./../Clusters/Bins-10k-10kstepsize/") #hotspots = read.delim("./Bins-1k-1kstepsize/hotspots.dynamic.txt", header = T) #plot.hotspots(bindir = "./Bins-10k-10kstepsize/", hotspots = read.delim("./Bins-1k-1kstepsize/hotspots.dynamic.txt", header = T), window = 1000, alpha = 0.01, pdffile = "./../Clusters/bp.hotspots.clusters.10k.pdf", plot.clusters = T, clusters.bindir = "./../Clusters/Bins-10k-10kstepsize/", plot.legend = F)