#This is executed after extracting summary tables for each line #Collate results per line collate.per.line <- function(summ.table, write.file = ""){ summary = summ.table total.breaks = tapply(as.numeric(summary$total.breaks), summary$line, sum) true.breaks = tapply(as.numeric(summary$true.breaks), summary$line, sum) false.breaks = tapply(as.numeric(summary$false.breaks), summary$line, sum) no.coverage = tapply(as.numeric(summary$no.coverage), summary$line, sum) t = matrix(data = c(total.breaks, true.breaks, false.breaks, no.coverage), nrow = 10, ncol = 4, byrow = FALSE, dimnames = list(names(total.breaks), c("total.breaks", "true.breaks", "false.breaks", "no.coverage"))) if(write.file!=""){ write.table(t, write.file) } return(t) } #Collate results per penalty of dynamic programming per line #line.table contains all the lines for a given penalty - generated by collate.per.line collate.per.penalty <- function(line.table, pen.list, penalty){ newcol = paste("Penalty =", penalty) pen.list$br.num[,newcol] = line.table[,"total.breaks"] pen.list$confirmed.total[,newcol] = line.table[,"true.breaks"] / line.table[,"total.breaks"] pen.list$confirmed.cov[,newcol] = line.table[,"true.breaks"] / (line.table[,"total.breaks"] - line.table[,"no.coverage"]) return(pen.list) } #Collate results per chromosome # # ## ##main script #Initialisation statistics.dir = "/Users/martha/Documents/DPhil/Recombination/Results/DifferentMosaics/Statistics/" penalties = c(20,50,80,100,150) lines = c("MAGIC.105", "MAGIC.149", "MAGIC.175", "MAGIC.183", "MAGIC.287", "MAGIC.329", "MAGIC.338", "MAGIC.426", "MAGIC.433", "MAGIC.446") col.names = paste("Penalty =", penalties) colours = c("firebrick", "darkolivegreen", "navyblue", "black", "cyan2", "darkslategrey", "purple2", "hotpink", "chocolate","gold") pen.list = list(br.num = matrix(NA, nrow = length(lines), ncol = length(penalties), dimnames = list(lines, col.names)), confirmed.total = matrix(NA, nrow = length(lines), ncol = length(penalties), dimnames = list(lines, col.names)), confirmed.cov = matrix(NA, nrow = length(lines), ncol = length(penalties), dimnames = list(lines, col.names))) tables = list.files(statistics.dir) tables = grep(".table", tables, value = TRUE) for (penalty in penalties){ print(penalty) file = grep(paste("summ", penalty, ".", sep = ""), tables, value = TRUE, fixed = TRUE) if (length(file)>0){ table = read.table(paste(statistics.dir, file, sep = ""), header = TRUE) line.table = collate.per.line(table) pen.list = collate.per.penalty(line.table, pen.list, penalty) } } pdf(file = "/Users/martha/Documents/DPhil/Recombination/Results/DifferentMosaics/Statistics-cov30-/summary3.pdf") par(mfrow = c(2,1)) br.num <- pen.list$br.num[,!apply(is.na(pen.list$br.num),2,all)] confirmed.total <- pen.list$confirmed.total[,!apply(is.na(pen.list$br.num),2,all)] confirmed.cov <- pen.list$confirmed.cov[,!apply(is.na(pen.list$br.num),2,all)] xx = as.numeric(sub("Penalty = ", "", colnames(br.num))) plot(x = xx, y=br.num[1,], type = "o", col = colours[1], xlim = c(0,160), ylim = c(0, 200), lwd = 2, main = "Predicted breakpoints vs penalty", xlab = "Penalty", ylab = "Predicted breakpoints") for(i in 2:10){ lines(x = xx, y=br.num[i,], type = "o", col = colours[i], lwd = 2) } legend(x = "topleft", legend = lines, fill = colours, cex = 0.6) plot(x = xx, y=confirmed.total[1,], type = "o", col = colours[1], xlim = c(0,160), ylim = c(0, 1), lwd = 2, main = "Breakpoints verified by data vs penalty (with NAs)", xlab = "Penalty", ylab = "Verified breakpoints (%)") for(i in 2:10){ lines(x = xx, y=confirmed.total[i,], type = "o", col = colours[i], lwd = 2) } legend(x = "topleft", legend = lines, fill = colours, cex = 0.6) plot(x = xx, y=confirmed.cov[1,], type = "o", col = colours[1], xlim = c(0,160), ylim = c(0, 1), lwd = 2, main = "Breakpoints verified by data vs penalty (without NAs)", xlab = "Penalty", ylab = "Verified breakpoints (%)") for(i in 2:10){ lines(x = xx, y=confirmed.cov[i,], type = "o", col = colours[i], lwd = 2) } legend(x = "topleft", legend = lines, fill = colours, cex = 0.6) dev.off()