#!/usr/bin/env python #Copyright (C) 2009-2011 by Benedict Paten (benedictpaten@gmail.com) # #Released under the MIT license, see LICENSE.txt #!/usr/bin/env python """ cactus_treeStatsPlotter.py dent earl, dearl (a) soe ucsc edu 7 jan 2010 Reads in a treeStat.xml generated by cactus_treeStats, then makes calls to R to generate fancy plots. """ ############################## import os, re, sys, subprocess import xml.etree.ElementTree as ET from optparse import OptionParser def usage(): sys.stderr.write('USAGE: %s treeStats.xml\n' %(sys.argv[0])) print __doc__ sys.exit(2) def writeRData(data, name): dataName = 'data_'+name+'.txt' DATA = open(dataName, 'w') for i in range(len(data)): DATA.write('%s\n' %(data[i])) DATA.close() def writeRScript(name, title, ylabel, numFiles, isLog=False, outputFormat='pdf', isLegend=False): if isLog: ylabel = 'log-log '+ylabel n=0 dataNames = [] for i in range(0, numFiles): dataNames.append('data_'+name+'_'+str(n)+'.txt') n=n+1 scriptName = 'script_'+name+'.r' if outputFormat == 'pdf': imgName = 'dist_'+name+'.pdf' startImageDeviceStr = "pdf('%s', height=4, width=8)\n" %(imgName) elif outputFormat == 'png': imgName = 'dist_'+name+'.png' startImageDeviceStr = "png('%s', height=400, width=800)\n" %(imgName) else: sys.stderr.write('Unknown output format: %s\n' %(outputFormat)) usage() SCRIPT = open(scriptName, 'w') SCRIPT.write("# This script auto-magically generated by treeStatPlotter.py\n") myPyColors=['red', 'green', 'blue', 'magenta', 'cyan','yellow'] SCRIPT.write("myColors=c('%s'" %(myPyColors[0])) for c in myPyColors[1:]: SCRIPT.write(", '%s'" %(c)) SCRIPT.write(")\n") SCRIPT.write("x=vector()\n") SCRIPT.write("y=list()\n") for i in range(0, numFiles): SCRIPT.write("x[%d] = read.delim('%s', header=FALSE)\n" %(i+1, dataNames[i])) if isLog: SCRIPT.write("y[[%d]] = log( quantile(x[[%d]], probs=seq(from=0, to=1, length=length(x[[%i]]))))\n" %(i+1, i+1, i+1)) else: SCRIPT.write("y[[%d]] = quantile(x[[%d]], probs=seq(from=0, to=1, length=length(x[[%i]])))\n" %(i+1, i+1, i+1)) SCRIPT.write("myYAxisMax = 1.1 * max( y[[1]][length(y[[1]])] ") for i in range(1, numFiles): SCRIPT.write(", y[[%d]][length(y[[%d]])]" %(i+1, i+1)) SCRIPT.write(" )\n") SCRIPT.write("for (i in 1:length(y)){\n") SCRIPT.write(" if (sum(y[[i]]) == 0){\n warning(\"One of the files in %s had all data '0'!\")\n quit(\"no\")\n }\n" %(name) ) SCRIPT.write(" }\n") SCRIPT.write("%s\n" %(startImageDeviceStr)) SCRIPT.write("plot.new()\n") SCRIPT.write("plot.window(xlim=c(0,1), ylim=c(1,myYAxisMax), log='y')\naxis(1);axis(2)\n") for i in range(0, numFiles): SCRIPT.write("lines(x=seq(from=0, to=1, length=length(y[[%d]])), y=y[[%d]], type='l', col=myColors[%d])\n" %(i+1, i+1, (i%len(myPyColors))+1)) SCRIPT.write("title(xlab='Quantile')\n") SCRIPT.write("title(ylab='%s')\n" % ylabel) SCRIPT.write("title(main='%s')\n" % title) SCRIPT.write("names=c('file 1'") for i in range(1, numFiles): SCRIPT.write(", 'file %d'" %(i+1)) SCRIPT.write(")\n") if isLegend: SCRIPT.write("legend('topleft', names, col=rep(myColors, length.out=length(x)), lty=rep(1, length(x)), bty='n')\n") SCRIPT.write("dev.off()\n") SCRIPT.close() def runRScript(name, title, ylabel, numFiles, noCleanup, isLog=False, outputFormat='pdf', isLegend=False): # we ignore the ylabel, isLog, outputFormat, isLegend scriptName = 'script_'+name+'.r' RCMD = 'R --no-save --no-restore <'+os.path.join(os.getcwd(), scriptName)+' >/dev/null 2>&1' Rstatus = subprocess.Popen(RCMD, shell=True) Rstatus.wait() if Rstatus.returncode: sys.stderr.write('Warning: %s: %s failed to run properly in R, return code was %d. Continuing...\n' %(sys.argv[0], scriptName, Rstatus.returncode)) #sys.exit(1) if not noCleanup: os.remove(scriptName) for i in range(0, numFiles): dataName = 'data_'+name+'_'+str(i)+'.txt' os.remove(dataName) def writeScriptFunc(name, title, ylabel, numFiles, noCleanup, isLog=False, outputFormat='pdf', isLegend=False): writeRScript(name, title, ylabel, numFiles, isLog, outputFormat, isLegend) def getPlots(xmlNode): l = [] counter = 0 def iterparent(tree): for parent in tree.getiterator(): for child in parent: yield parent, child for parent, child in iterparent(xmlNode): if child.text != None: baseTokens = parent.tag.split("_") + child.tag.split("_") attribTokens = [ "%s:%s" % (key, parent.attrib[key]) for key in parent.attrib.keys() ] l.append((("%s_%i" % ("_".join(baseTokens), counter)), " ".join(baseTokens + attribTokens), " ".join(baseTokens), child)) counter += 1 return l def writeDataLoop(xml, number): for name, title, yLabel, node in getPlots(xml): dist = node.text.split() writeRData(dist, name+'_'+str(number)) def allPlots(xmlList, function, noCleanup, isLog=False, outputFormat='pdf', isLegend=False): for name, title, yLabel, node in getPlots(xmlList[0]): function(name, title, yLabel, len(xmlList), noCleanup, isLog, outputFormat, isLegend) def main(): if len(sys.argv) < 2: sys.stderr.write('Too few input arguments\n') usage() parser=OptionParser() parser.add_option('-n', '--noCleanup', action='store_true', dest='noCleanup', help='Preserves the data files and R scripts.') parser.add_option('-l', '--loglog', action='store_true', dest='log', default=False, help='Takes the log of the data and plots it on a log scale: log of log(y).') parser.add_option('-e', '--legend', action='store_true', dest='legend', default=False, help='Adds a legend to plots..') parser.add_option('-f', '--format', action='store', type='string', dest='format', default='pdf', help='Switches the plot output format from pdf to png.') (options, args) = parser.parse_args() Rstatus = subprocess.Popen('R --version > /dev/null 2>&1', shell=True) Rstatus.wait() if Rstatus.returncode: sys.stderr.write('ERROR: %s depends on R. Please install R or use --noPlots (or verify your PATH).\n' %(sys.argv[0])) sys.exit(1) ############################## # end checks # collect and parse all the input stats.xml xml=[] for f in args: if not os.path.isfile(f): sys.stderr.write('%s is not a file.\n' %(f)) usage() xml.append(ET.parse(f)) # write out all the data files i=0 for x in xml: writeDataLoop(x, i) i=i+1 # write out scripts allPlots(xml, writeScriptFunc, options.noCleanup, isLog=options.log, outputFormat=options.format, isLegend=options.legend) # run scripts allPlots(xml, runRScript, options.noCleanup) if __name__ == "__main__": main()