import os, os.path import tables from indexed_search import DB from time import time class PyTables_DB(DB): def __init__(self, nrows, rng, userandom, datadir, docompress=0, complib='zlib', kind="medium", optlevel=6): DB.__init__(self, nrows, rng, userandom) self.tprof = [] # Specific part for pytables self.docompress = docompress self.complib = complib # Complete the filename self.filename = "pro-" + self.filename self.filename += '-' + 'O%s' % optlevel self.filename += '-' + kind if docompress: self.filename += '-' + complib + str(docompress) self.filename = datadir + '/' + self.filename + '.h5' # The chosen filters self.filters = tables.Filters(complevel=self.docompress, complib=self.complib, shuffle = 1) print "Processing database:", self.filename def open_db(self, remove=0): if remove and os.path.exists(self.filename): os.remove(self.filename) con = tables.open_file(self.filename, 'a') return con def close_db(self, con): # Remove first the table_cache attribute if it exists if hasattr(self, "table_cache"): del self.table_cache con.close() def create_table(self, con): class Record(tables.IsDescription): col1 = tables.Int32Col() col2 = tables.Int32Col() col3 = tables.Float64Col() col4 = tables.Float64Col() table = con.create_table(con.root, 'table', Record, filters=self.filters, expectedrows=self.nrows) def fill_table(self, con): "Fills the table" table = con.root.table j = 0 for i in xrange(0, self.nrows, self.step): stop = (j+1)*self.step if stop > self.nrows: stop = self.nrows arr_i4, arr_f8 = self.fill_arrays(i, stop) # recarr = records.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) # table.append(recarr) table.append([arr_i4, arr_i4, arr_f8, arr_f8]) j += 1 table.flush() def index_col(self, con, column, kind, optlevel, verbose): col = getattr(con.root.table.cols, column) col.create_index(kind=kind, optlevel=optlevel, filters=self.filters, tmp_dir="/scratch2/faltet", _verbose=verbose, _blocksizes=None) # _blocksizes=(2**27, 2**22, 2**15, 2**7)) # _blocksizes=(2**27, 2**22, 2**14, 2**6)) # _blocksizes=(2**27, 2**20, 2**13, 2**5), # _testmode=True) def do_query(self, con, column, base, inkernel): if True: if not hasattr(self, "table_cache"): self.table_cache = table = con.root.table self.colobj = getattr(table.cols, column) #self.colobj = getattr(table.cols, 'col1') self.condvars = {"col": self.colobj, "col1": table.cols.col1, "col2": table.cols.col2, "col3": table.cols.col3, "col4": table.cols.col4, } table = self.table_cache colobj = self.colobj else: table = con.root.table colobj = getattr(table.cols, column) self.condvars = {"col": colobj, "col1": table.cols.col1, "col2": table.cols.col2, "col3": table.cols.col3, "col4": table.cols.col4, } self.condvars['inf'] = self.rng[0]+base self.condvars['sup'] = self.rng[1]+base # For queries that can use two indexes instead of just one d = (self.rng[1] - self.rng[0]) / 2. inf1 = int(self.rng[0]+base) sup1 = int(self.rng[0]+d+base) inf2 = self.rng[0]+base*2 sup2 = self.rng[0]+d+base*2 self.condvars['inf1'] = inf1 self.condvars['sup1'] = sup1 self.condvars['inf2'] = inf2 self.condvars['sup2'] = sup2 #condition = "(inf == col2)" #condition = "(inf==col2) & (col4==sup)" #condition = "(inf==col2) | (col4==sup)" #condition = "(inf==col2) | (col2==sup)" #condition = "(inf==col2) & (col3==sup)" #condition = "((inf==col2) & (sup==col4)) & (col3==sup)" #condition = "((inf==col1) & (sup==col4)) & (col3==sup)" #condition = "(inf<=col1) & (col3", inf1, inf2, sup1, sup2 condition = "((inf2<=col) & (col", c['inf'], c['sup'], c['inf2'], c['sup2'] ncoords = 0 if colobj.is_indexed: results = [r[column] for r in table.where(condition, self.condvars)] # coords = table.get_where_list(condition, self.condvars) # results = table.read_coordinates(coords, field=column) # results = table.read_where(condition, self.condvars, field=column) elif inkernel: print "Performing in-kernel query" results = [r[column] for r in table.where(condition, self.condvars)] #coords = [r.nrow for r in table.where(condition, self.condvars)] #results = table.read_coordinates(coords) # for r in table.where(condition, self.condvars): # var = r[column] # ncoords += 1 else: # coords = [r.nrow for r in table # if (self.rng[0]+base <= r[column] <= self.rng[1]+base)] # results = table.read_coordinates(coords) print "Performing regular query" results = [ r[column] for r in table if (((inf2<=r['col4']) and (r['col4'] 3))] ncoords = len(results) #return coords #print "results-->", results #return results return ncoords #self.tprof.append( self.colobj.index.tprof ) #return ncoords, self.tprof