import tables
import os, os.path
from time import time
import random
import numarray
from numarray import random_array
from numarray import records

# in order to always generate the same random sequence
random.seed(19)
random_array.seed(19, 20)

def open_db(filename, remove=0):
    if remove and os.path.exists(filename):
        os.remove(filename)
    con = tables.open_file(filename, 'a')
    return con

def create_db(filename, nrows):

    class Record(tables.IsDescription):
        col1 = tables.Int32Col()
        col2 = tables.Int32Col()
        col3 = tables.Float64Col()
        col4 = tables.Float64Col()

    con = open_db(filename, remove=1)
    table = con.create_table(con.root, 'table', Record,
                            filters=filters, expectedrows=nrows)
    table.indexFilters = filters
    step = 1000*100
    scale = 0.1
    t1=time()
    j = 0
    for i in xrange(0, nrows, step):
        stop = (j+1)*step
        if stop > nrows:
            stop = nrows
        arr_f8 = numarray.arange(i, stop, type=numarray.Float64)
        arr_i4 = numarray.arange(i, stop, type=numarray.Int32)
        if userandom:
            arr_f8 += random_array.normal(0, stop*scale, shape=[stop-i])
            arr_i4 = numarray.array(arr_f8, type=numarray.Int32)
        recarr = records.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8])
        table.append(recarr)
        j += 1
    table.flush()
    ctime = time()-t1
    if verbose:
        print "insert time:", round(ctime, 5)
        print "Krows/s:", round((nrows/1000.)/ctime, 5)
    index_db(table)
    close_db(con)

def index_db(table):
    t1=time()
    table.cols.col2.create_index()
    itime = time()-t1
    if verbose:
        print "index time (int):", round(itime, 5)
        print "Krows/s:", round((nrows/1000.)/itime, 5)
    t1=time()
    table.cols.col4.create_index()
    itime = time()-t1
    if verbose:
        print "index time (float):", round(itime, 5)
        print "Krows/s:", round((nrows/1000.)/itime, 5)

def query_db(filename, rng):
    con = open_db(filename)
    table = con.root.table
    # Query for integer columns
    # Query for non-indexed column
    if not doqueryidx:
        t1=time()
        ntimes = 10
        for i in range(ntimes):
            results = [ r['col1'] for r in
                        table.where(rng[0]+i <= table.cols.col1 <= rng[1]+i) ]
        qtime = (time()-t1)/ntimes
        if verbose:
            print "query time (int, not indexed):", round(qtime, 5)
            print "Mrows/s:", round((nrows/1000.)/qtime, 5)
            print results
    # Query for indexed column
    t1=time()
    ntimes = 10
    for i in range(ntimes):
        results = [ r['col1'] for r in
                    table.where(rng[0]+i <= table.cols.col2 <= rng[1]+i) ]
    qtime = (time()-t1)/ntimes
    if verbose:
        print "query time (int, indexed):", round(qtime, 5)
        print "Mrows/s:", round((nrows/1000.)/qtime, 5)
        print results
    # Query for floating columns
    # Query for non-indexed column
    if not doqueryidx:
        t1=time()
        ntimes = 10
        for i in range(ntimes):
            results = [ r['col3'] for r in
                        table.where(rng[0]+i <= table.cols.col3 <= rng[1]+i) ]
        qtime = (time()-t1)/ntimes
        if verbose:
            print "query time (float, not indexed):", round(qtime, 5)
            print "Mrows/s:", round((nrows/1000.)/qtime, 5)
            print results
    # Query for indexed column
    t1=time()
    ntimes = 10
    for i in range(ntimes):
        results = [ r['col3'] for r in
                    table.where(rng[0]+i <= table.cols.col4 <= rng[1]+i) ]
    qtime = (time()-t1)/ntimes
    if verbose:
        print "query time (float, indexed):", round(qtime, 5)
        print "Mrows/s:", round((nrows/1000.)/qtime, 5)
        print results
    close_db(con)

def close_db(con):
    con.close()

if __name__=="__main__":
    import sys
    import getopt
    try:
        import psyco
        psyco_imported = 1
    except:
        psyco_imported = 0

    usage = """usage: %s [-v] [-p] [-m] [-c] [-q] [-i] [-z complevel] [-l complib] [-R range] [-n nrows] file
            -v verbose
            -p use "psyco" if available
            -m use random values to fill the table
            -q do a query (both indexed and non-indexed version)
            -i do a query (exclude non-indexed version)
            -c create the database
            -z compress with zlib (no compression by default)
            -l use complib for compression (zlib used by default)
            -R select a range in a field in the form "start,stop" (def "0,10")
            -n sets the number of rows (in krows) in each table
            \n""" % sys.argv[0]

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'vpmcqiz:l:R:n:')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    verbose = 0
    usepsyco = 0
    userandom = 0
    docreate = 0
    docompress = 0
    complib = "zlib"
    doquery = 0
    doqueryidx = 0
    rng = [0, 10]
    nrows = 1

    # Get the options
    for option in opts:
        if option[0] == '-v':
            verbose = 1
        elif option[0] == '-p':
            usepsyco = 1
        elif option[0] == '-m':
            userandom = 1
        elif option[0] == '-c':
            docreate = 1
            createindex = 1
        elif option[0] == '-q':
            doquery = 1
        elif option[0] == '-i':
            doqueryidx = 1
        elif option[0] == '-z':
            docompress = int(option[1])
        elif option[0] == '-l':
            complib = option[1]
        elif option[0] == '-R':
            rng = [int(i) for i in option[1].split(",")]
        elif option[0] == '-n':
            nrows = int(option[1])

    # Catch the hdf5 file passed as the last argument
    filename = pargs[0]

    # The filters chosen
    filters = tables.Filters(complevel=docompress, complib=complib)

    if verbose:
        print "pytables version:", tables.__version__
        if userandom:
            print "using random values"
        if doqueryidx:
            print "doing indexed queries only"

    if docreate:
        if verbose:
            print "writing %s krows" % nrows
        if psyco_imported and usepsyco:
            psyco.bind(create_db)
        nrows *= 1000
        create_db(filename, nrows)

    if doquery:
        query_db(filename, rng)