#!/usr/bin/env python

import numpy as NP
from tables import *

# This class is accessible only for the examples
class Small(IsDescription):
    var1 = StringCol(itemsize=4, pos=2)
    var2 = Int32Col(pos=1)
    var3 = Float64Col(pos=0)

# Define a user record to characterize some kind of particles
class Medium(IsDescription):
    name        = StringCol(itemsize=16, pos=0)  # 16-character String
    float1      = Float64Col(shape=2, dflt=NP.arange(2), pos=1)
    #float1      = Float64Col(dflt=2.3)
    #float2      = Float64Col(dflt=2.3)
    #zADCcount    = Int16Col()    # signed short integer
    ADCcount    = Int32Col(pos=6)    # signed short integer
    grid_i      = Int32Col(pos=7)    # integer
    grid_j      = Int32Col(pos=8)    # integer
    pressure    = Float32Col(pos=9)    # float  (single-precision)
    energy      = Float64Col(pos=2)    # double (double-precision)
    #unalig      = Int8Col()          # just to unalign data

# Define a user record to characterize some kind of particles
class Big(IsDescription):
    name        = StringCol(itemsize=16)  # 16-character String
    float1      = Float64Col(shape=32, dflt=NP.arange(32))
    float2      = Float64Col(shape=32, dflt=2.2)
    TDCcount    = Int8Col()    # signed short integer
    #ADCcount    = Int32Col()
    #ADCcount    = Int16Col()    # signed short integer
    grid_i      = Int32Col()    # integer
    grid_j      = Int32Col()    # integer
    pressure    = Float32Col()    # float  (single-precision)
    energy      = Float64Col()    # double (double-precision)

def createFile(filename, totalrows, filters, recsize):

    # Open a file in "w"rite mode
    fileh = open_file(filename, mode = "w", title="Table Benchmark",
                     filters=filters)

    # Table title
    title = "This is the table title"

    # Create a Table instance
    group = fileh.root
    rowswritten = 0
    for j in range(3):
        # Create a table
        if recsize == "big":
            table = fileh.create_table(group, 'tuple'+str(j), Big, title,
                                      None,
                                      totalrows)
        elif recsize == "medium":
            table = fileh.create_table(group, 'tuple'+str(j), Medium, title,
                                      None,
                                      totalrows)
        elif recsize == "small":
            table = fileh.create_table(group, 'tuple'+str(j), Small, title,
                                      None,
                                      totalrows)
        else:
            raise RuntimeError("This should never happen")

        table.attrs.test = 2
        rowsize = table.rowsize
        # Get the row object associated with the new table
        d = table.row
        # Fill the table
        if recsize == "big":
            for i in xrange(totalrows):
                # d['name']  = 'Part: %6d' % (i)
                d['TDCcount'] = i % 256
                #d['float1'] = NP.array([i]*32, NP.float64)
                #d['float2'] = NP.array([i**2]*32, NP.float64)
                #d['float1'][0] = float(i)
                #d['float2'][0] = float(i*2)
                # Common part with medium
                d['grid_i'] = i
                d['grid_j'] = 10 - i
                d['pressure'] = float(i*i)
                # d['energy'] = float(d['pressure'] ** 4)
                d['energy'] = d['pressure']
                # d['idnumber'] = i * (2 ** 34)
                d.append()
        elif recsize == "medium":
            for i in xrange(totalrows):
                #d['name']  = 'Part: %6d' % (i)
                #d['float1'] = NP.array([i]*2, NP.float64)
                #d['float1'] = arr
                #d['float1'] = i
                #d['float2'] = float(i)
                # Common part with big:
                d['grid_i'] = i
                d['grid_j'] = 10 - i
                d['pressure'] = i*2
                # d['energy'] = float(d['pressure'] ** 4)
                d['energy'] = d['pressure']
                d.append()
        else: # Small record
            for i in xrange(totalrows):
                #d['var1'] = str(random.randrange(1000000))
                #d['var3'] = random.randrange(10000000)
                d['var1'] = str(i)
                #d['var2'] = random.randrange(totalrows)
                d['var2'] = i
                #d['var3'] = 12.1e10
                d['var3'] = totalrows-i
                d.append()  # This is a 10% faster than table.append()
        rowswritten += totalrows

        if recsize == "small":
            # Testing with indexing
            pass
#            table._createIndex("var3", Filters(1,"zlib",shuffle=1))

        #table.flush()
        group._v_attrs.test2 = "just a test"
        # Create a new group
        group2 = fileh.create_group(group, 'group'+str(j))
        # Iterate over this new group (group2)
        group = group2
        table.flush()

    # Close the file (eventually destroy the extended type)
    fileh.close()
    return (rowswritten, rowsize)

def readFile(filename, recsize, verbose):
    # Open the HDF5 file in read-only mode

    fileh = open_file(filename, mode = "r")
    rowsread = 0
    for groupobj in fileh.walk_groups(fileh.root):
        #print "Group pathname:", groupobj._v_pathname
        row = 0
        for table in fileh.list_nodes(groupobj, 'Table'):
            rowsize = table.rowsize
            print "reading", table
            if verbose:
                print "Max rows in buf:", table.nrowsinbuf
                print "Rows in", table._v_pathname, ":", table.nrows
                print "Buffersize:", table.rowsize * table.nrowsinbuf
                print "MaxTuples:", table.nrowsinbuf

            if recsize == "big" or recsize == "medium":
                #e = [ p.float1 for p in table.iterrows()
                #      if p.grid_i < 2 ]
                #e = [ str(p) for p in table.iterrows() ]
                #      if p.grid_i < 2 ]
#                 e = [ p['grid_i'] for p in table.iterrows()
#                       if p['grid_j'] == 20 and p['grid_i'] < 20 ]
#                 e = [ p['grid_i'] for p in table
#                       if p['grid_i'] <= 2 ]
#                e = [ p['grid_i'] for p in table.where("grid_i<=20")]
#                 e = [ p['grid_i'] for p in
#                       table.where('grid_i <= 20')]
                e = [ p['grid_i'] for p in
                      table.where('(grid_i <= 20) & (grid_j == 20)')]
#                 e = [ p['grid_i'] for p in table.iterrows()
#                       if p.nrow() == 20 ]
#                 e = [ table.delrow(p.nrow()) for p in table.iterrows()
#                       if p.nrow() == 20 ]
                # The version with a for loop is only 1% better than
                # comprenhension list
                #e = []
                #for p in table.iterrows():
                #    if p.grid_i < 20:
                #        e.append(p.grid_j)
            else:  # small record case
#                 e = [ p['var3'] for p in table.iterrows()
#                       if p['var2'] < 20 and p['var3'] < 20 ]
#                e = [ p['var3'] for p in table.where("var3 <= 20")
#                      if p['var2'] < 20 ]
#               e = [ p['var3'] for p in table.where("var3 <= 20")]
# Cuts 1) and 2) issues the same results but 2) is about 10 times faster
# ######## Cut 1)
#                e = [ p.nrow() for p in
#                      table.where(table.cols.var2 > 5)
#                      if p["var2"] < 10]
# ######## Cut 2)
#                 e = [ p.nrow() for p in
#                       table.where(table.cols.var2 < 10)
#                       if p["var2"] > 5]
#                e = [ (p._nrow,p["var3"]) for p in
#                e = [ p["var3"] for p in
#                      table.where(table.cols.var3 < 10)]
#                      table.where(table.cols.var3 < 10)]
#                      table if p["var3"] <= 10]
#               e = [ p['var3'] for p in table.where("var3 <= 20")]
#                e = [ p['var3'] for p in
#                      table.where(table.cols.var1 == "10")]  # More
                     # than ten times faster than the next one
#                e = [ p['var3'] for p in table
#                      if p['var1'] == "10"]
#                e = [ p['var3'] for p in table.where('var2 <= 20')]
                e = [ p['var3'] for p in table.where('(var2 <= 20) & (var2 >= 3)')]
                # e = [ p[0] for p in table.where('var2 <= 20')]
                #e = [ p['var3'] for p in table if p['var2'] <= 20 ]
                # e = [ p[:] for p in table if p[1] <= 20 ]
#                  e = [ p['var3'] for p in table._whereInRange(table.cols.var2 <=20)]
                #e = [ p['var3'] for p in table.iterrows(0,21) ]
#                  e = [ p['var3'] for p in table.iterrows()
#                       if p.nrow() <= 20 ]
                #e = [ p['var3'] for p in table.iterrows(1,0,1000)]
                #e = [ p['var3'] for p in table.iterrows(1,100)]
                #e = [ p['var3'] for p in table.iterrows(step=2)
                #      if p.nrow() < 20 ]
                #e = [ p['var2'] for p in table.iterrows()
                #      if p['var2'] < 20 ]
                #for p in table.iterrows():
                #      pass
            if verbose:
                #print "Last record read:", p
                print "resulting selection list ==>", e

            rowsread += table.nrows
            row += 1
            if verbose:
                print "Total selected records ==> ", len(e)

    # Close the file (eventually destroy the extended type)
    fileh.close()

    return (rowsread, rowsize)

def readField(filename, field, rng, verbose):
    fileh = open_file(filename, mode = "r")
    rowsread = 0
    if rng is None:
        rng = [0, -1, 1]
    if field == "all":
        field = None
    for groupobj in fileh.walk_groups(fileh.root):
        for table in fileh.list_nodes(groupobj, 'Table'):
            rowsize = table.rowsize
            #table.nrowsinbuf = 3 # For testing purposes
            if verbose:
                print "Max rows in buf:", table.nrowsinbuf
                print "Rows in", table._v_pathname, ":", table.nrows
                print "Buffersize:", table.rowsize * table.nrowsinbuf
                print "MaxTuples:", table.nrowsinbuf
                print "(field, start, stop, step) ==>", (field, rng[0], rng[1], rng[2])

            e = table.read(rng[0], rng[1], rng[2], field)

            rowsread += table.nrows
            if verbose:
                print "Selected rows ==> ", e
                print "Total selected rows ==> ", len(e)

    # Close the file (eventually destroy the extended type)
    fileh.close()
    return (rowsread, rowsize)

if __name__=="__main__":
    import sys
    import getopt

    try:
        import psyco
        psyco_imported = 1
    except:
        psyco_imported = 0

    import time

    usage = """usage: %s [-v] [-p] [-P] [-R range] [-r] [-w] [-s recsize] [-f field] [-c level] [-l complib] [-i iterations] [-S] [-F] file
            -v verbose
            -p use "psyco" if available
            -P do profile
            -R select a range in a field in the form "start,stop,step"
            -r only read test
            -w only write test
            -s use [big] record, [medium] or [small]
            -f only read stated field name in tables ("all" means all fields)
            -c sets a compression level (do not set it or 0 for no compression)
            -S activate shuffling filter
            -F activate fletcher32 filter
            -l sets the compression library to be used ("zlib", "lzo", "blosc", "bzip2")
            -i sets the number of rows in each table\n""" % sys.argv[0]

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'vpPSFR:rwf:s:c:l:i:')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) != 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    verbose = 0
    profile = 0
    rng = None
    recsize = "medium"
    fieldName = None
    testread = 1
    testwrite = 1
    usepsyco = 0
    complevel = 0
    shuffle = 0
    fletcher32 = 0
    complib = "zlib"
    iterations = 100

    # Get the options
    for option in opts:
        if option[0] == '-v':
            verbose = 1
        if option[0] == '-p':
            usepsyco = 1
        if option[0] == '-P':
            profile = 1
        if option[0] == '-S':
            shuffle = 1
        if option[0] == '-F':
            fletcher32 = 1
        elif option[0] == '-R':
            rng = [int(i) for i in option[1].split(",")]
        elif option[0] == '-r':
            testwrite = 0
        elif option[0] == '-w':
            testread = 0
        elif option[0] == '-f':
            fieldName = option[1]
        elif option[0] == '-s':
            recsize = option[1]
            if recsize not in ["big", "medium", "small"]:
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '-c':
            complevel = int(option[1])
        elif option[0] == '-l':
            complib = option[1]
        elif option[0] == '-i':
            iterations = int(option[1])

    # Build the Filters instance
    filters = Filters(complevel=complevel, complib=complib,
                      shuffle=shuffle, fletcher32=fletcher32)

    # Catch the hdf5 file passed as the last argument
    file = pargs[0]

    if verbose:
        print "numpy version:", NP.__version__
        if psyco_imported and usepsyco:
            print "Using psyco version:", psyco.version_info

    if testwrite:
        print "Compression level:", complevel
        if complevel > 0:
            print "Compression library:", complib
            if shuffle:
                print "Suffling..."
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(createFile)
        if profile:
            import profile as prof
            import pstats
            prof.run('(rowsw, rowsz) = createFile(file, iterations, filters, recsize)', 'table-bench.prof')
            stats = pstats.Stats('table-bench.prof')
            stats.strip_dirs()
            stats.sort_stats('time', 'calls')
            stats.print_stats(20)
        else:
            (rowsw, rowsz) = createFile(file, iterations, filters, recsize)
        t2 = time.time()
        cpu2 = time.clock()
        tapprows = round(t2-t1, 3)
        cpuapprows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpuapprows/tapprows, 2)*100)
        print "Rows written:", rowsw, " Row size:", rowsz
        print "Time writing rows: %s s (real) %s s (cpu)  %s%%" % \
              (tapprows, cpuapprows, tpercent)
        print "Write rows/sec: ", int(rowsw / float(tapprows))
        print "Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))

    if testread:
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(readFile)
            #psyco.bind(readField)
            pass
        if rng or fieldName:
            (rowsr, rowsz) = readField(file, fieldName, rng, verbose)
            pass
        else:
            for i in range(1):
                (rowsr, rowsz) = readFile(file, recsize, verbose)
        t2 = time.time()
        cpu2 = time.clock()
        treadrows = round(t2-t1, 3)
        cpureadrows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpureadrows/treadrows, 2)*100)
        print "Rows read:", rowsr, " Row size:", rowsz
        print "Time reading rows: %s s (real) %s s (cpu)  %s%%" % \
              (treadrows, cpureadrows, tpercent)
        print "Read rows/sec: ", int(rowsr / float(treadrows))
        print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))