#!/usr/bin/python2.7

"""
Convert a qual (qv) file to several BinnedArray files for fast seek.
This script takes approximately 4 seconds per 1 million base pairs.

The input format is fasta style quality -- fasta headers followed by 
whitespace separated integers.

usage: %prog qual_file output_file
"""

import string
import psyco_full
import sys
from binned_array import *
from bx.cookbook import *
import fileinput

def main():
    args = sys.argv[1:]
    try:
        qual_file = args[ 0 ]
        output_file = args[ 1 ]
    except:
        print "usage: qual_file output_file"
        sys.exit()

    qual = fileinput.FileInput( qual_file )
    outfile = None
    outbin = None
    base_count = 0
    mega_count = 0

    for line in qual:
        line = line.rstrip("\r\n")
        if line.startswith(">"):
            # close old
            if outbin and outfile:
                print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
                outbin.finish()
                outfile.close()
            # start new file
            region = line.lstrip(">")
            outfname = output_file + "." + region + ".bqv"
            print "Writing region " + region + " to file " + outfname
            outfile = open( outfname , "wb")
            outbin = BinnedArrayWriter(outfile, typecode='b', default=0)
            base_count = 0
            mega_count = 0
        else:
            if outfile and outbin:
                nums = line.split()
                for val in nums:
                    outval = int(val)
                    assert outval <= 255 and outval >= 0
                    outbin.write(outval)
                    base_count += 1
                if (mega_count * 1000000) <= base_count:
                    sys.stdout.write(str(mega_count)+" ")
                    sys.stdout.flush()
                    mega_count = base_count // 1000000 + 1
    if outbin and outfile:
        print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
        outbin.finish()
        outfile.close()

if __name__ == "__main__":
    main()