#!/usr/bin/env python2.7 import sys import argparse import time from ucscGb.qa import qaUtils from ucscGb.qa.tables import factory from ucscGb.qa.tables import reporter from ucscGb.qa.tables import summary from ucscGb.qa.encode import tableCheck def parseCommandLine(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description='Validates tables and records table statistics', epilog=""" The following tests are run: checks for underscores in table names checks for the existence of table descriptions checks shortLabel and longLabel length positionalTblCheck checkTblCoords genePredCheck pslCheck The following statistics are recorded: featureBits item count per chromosome stats creates 2 files: outFile.summary, outFile.log """) parser.add_argument('db', help='the database to check') parser.add_argument('-f', dest='tableFile', nargs=1, type=argparse.FileType('r'), help='a file listing tables to check (separated by spaces or newlines)') parser.add_argument('table', help ='the name of a table to check', nargs='*') parser.add_argument('outFile', help='base name to give results files') args = parser.parse_args() if args.tableFile: # add any additional tables in the table file to the table list args.table.extend((args.tableFile[0].read()).split()) args.tableFile[0].close() return args def checkTablesReceived(tableList, outFile): """Raises an exception if the tableList is empty. (Necessary because both ways of specifying tables are optional.)""" if not tableList: raise Exception("Received outFile name '" + outFile + "', but no tables were specified.") def tableExists(db, table): """True if the table exists in the given database.""" sqlResult = qaUtils.callHgsql(db, "show tables like '" + table + "'") return sqlResult.strip() == table def checkTablesExist(db, tableList): """Raises an exception if any table in the tables list does not exist in the database.""" for name in tableList: if not tableExists(db, name): raise Exception("Table " + name + " in " + db + " does not exist") def runTests(db, tableList, reporter, sumTable): """Runs validate() and statistics() methods on each table. Writes log output.""" delimiterLine = "===============================================" reporter.writeTimestamp() reporter.writeBlankLine() for table in tableList: reporter.writeLine(delimiterLine) reporter.writeLine(db + "." + table +":\n") table = factory.tableQaFactory(db, table, reporter, sumTable) table.validate() reporter.writeBlankLine() table.statistics() reporter.writeLine("Tests complete:") reporter.writeTimestamp() def writeSummaryFile(sumTable, sumFile): sumFile.write(sumTable.tabSeparated()) args = parseCommandLine() tableList = args.table db = args.db outFile = args.outFile checkTablesReceived(tableList, outFile) checkTablesExist(db, tableList) logFile = open(outFile + ".log", "w") sumFile = open(outFile + ".summary", "w") reporter = reporter.Reporter(logFile) sumTable = summary.SumTable() runTests(db, tableList, reporter, sumTable) writeSummaryFile(sumTable, sumFile) logFile.close() sumFile.close()