from tables import * from itertools import * from numpy.random import randint import matplotlib matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! import matplotlib.pyplot as plt import numpy as np from numpy import array, dtype, newaxis,zeros,ones,log from bisect import * import sys """ gets the frequency spectrum for the novel snps """ def main(infile, freqfile, transitionfile): h5file2 = openFile("inbredgeno.h5", "r") siteinbred = h5file2.root.Genotypes.Sites h5file = openFile(infile, "r") sitewild = h5file.root.Genotypes.Sites #index=siteinbred.cols.chrom.createIndex() def ainb(a1, b1): #is a in b b1.sort() s=np.searchsorted(b1, a1) inbounds=np.where(s 0) * 1 + (igt1 > 0) * 1 results += iscores #minorfreq = np.min(array((results, array(numchroms) - results)).T, 1) np.savetxt(freqfile, results) #find transition transversion ratio for each frequency bin #judge whether each site is a transition ref=sites_r['ref'] alt=sites_r['alt'] refispyr = (ref==ord('C')) | (ref==ord('T')) altispyr = (alt==ord('C')) | (alt==ord('T')) refispur = (ref==ord('A')) | (ref==ord('G')) altispur = (alt==ord('A')) | (alt==ord('G')) transition = (refispyr & altispyr) | (refispur & altispur) ratio={} for i in range(max(freq)+1): print i, sum(transition & (freq==i)) , float(sum(np.logical_not(transition) & (freq==i))) ratio[i] = sum(transition & (freq==i)) / float(sum(np.logical_not(transition) & (freq==i))) np.savetxt(transitionfile, array(list(ratio.iteritems()))) infile = sys.argv[1] if len(sys.argv)>1 else 'geno.h5' freqfile = sys.argv[2] if len(sys.argv)>2 else 'freq.txt' transitionfile = sys.argv[3] if len(sys.argv)>3 else 'transitionratio.txt' if __name__ == "__main__": main(infile, freqfile, transitionfile)