#!/bin/bash set -beEu -o pipefail ############################################################################### # doGenbankTests # # June 2008 -- Brooke Rhead # # Wrapper script for automated checking of GenBank tables. # Note: this script assumes the kent source tree is checked out in ~/kent/src # ############################################################################### tableExists() { local db=$1 table=$2 local res=$(hgsql -Ne "show tables like '$table'" $db) if [ -z "$res" ] then return 1 else return 0 fi } databaseExists() { local db=$1 local res=$(hgsql -Ne "show databases like '$db'") if [ -z "$res" ] then return 1 else return 0 fi } selectShortLabel() { local db=$1 table=$2 hgsql -NBe "select shortLabel from trackDb where tableName='$table'" $db } getShortLabel() { local db=$1 table=$2 local res=$(selectShortLabel $db $table) if [ -z "$res" -a "$table" = "all_mrna" ] then res=$(selectShortLabel $db mrna) elif [ -z "$res" -a "$table" = "all_est" ] then res=$(selectShortLabel $db est) fi echo $res } # keep track of errors vs. no errors allOk=yes # check host, set path to genbank sanity location if [ "$HOST" != hgwdev -a "$HOST" != hgwbeta ] then echo -e "\nThis script must be run from hgwdev or hgwbeta.\n" >&2 exit 1 elif [ "$HOST" = hgwdev ] then sanityPath="/cluster/data/genbank" else #host is hgwbeta sanityPath="/genbank" fi # get arguments / give usage statement if [ $# -lt 1 -o $# -gt 2 ] then echo -e "\n usage: $(basename $0) db [outFile] where outFile is the name of a file in which to record results\n" >&2 exit 1 else db="$1" if ! databaseExists $db then echo "ERROR: $db does not seem to be a valid database name" >&2 exit 1 fi if [ $# -eq 2 ] then exec > "$2" 2>&1 # neat bash trick to redirect stdout (also redirecting error) fi fi # specify tables that should get each test # intronEst is a special case; it may exist only in split form intronEstTables=$(hgsql -Ne "show tables like '%intronEst'" $db) trackTables="refGene xenoRefGene mgcFullMrna orfeomeMrna all_mrna intronEst all_est xenoMrna xenoEst" genePredTables="refGene xenoRefGene mgcGenes orfeomeGenes" pslTables="mgcFullMrna orfeomeMrna all_mrna xenoMrna all_est refSeqAli xenoRefSeqAli xenoEst $intronEstTables" featureTables="refGene xenoRefGene mgcGenes mgcFullMrna orfeomeGenes orfeomeMrna all_mrna intronEst all_est xenoMrna xenoEst" # print information about the script at the top of the output echo "$(basename $0) output on $db" date echo # get a list of all genbank tracks present in this assembly echo -e "The following GenBank tracks are present in this assembly:\n" for table in $trackTables do tableExists $db $table && getShortLabel $db $table done # intronEst is a special case; it may exist only in split form if ! tableExists $db intronEst # table is split, but not in trackDb (or not there) then if [ -n "$intronEstTables" ] # split tables exist then getShortLabel $db intronEst fi fi echo # run genePredChecks and pslChecks echo -e "--> Running genePredCheck and pslCheck:\n" for table in $genePredTables do tableExists $db $table && { cmd="nice genePredCheck -db=$db $table" echo $cmd $cmd || allOk=no } done echo for table in $pslTables do tableExists $db $table && { cmd="nice pslCheck -db=$db $table" echo $cmd $cmd || allOk=no } done echo # run joinerCheck joinerLoc=~/kent/src/hg/makeDb/schema/all.joiner echo -e "--> Checking appropriate keys in all.joiner:\n" doJoinerCheck() { local table=$1 ident=$2 tableExists $db $table && { cmd="nice joinerCheck -keys -database=$db -identifier=$ident $joinerLoc" echo $cmd $cmd || allOk=no echo } return 0 } doJoinerCheck gbCdnaInfo gbCdnas doJoinerCheck refGene refSeqId doJoinerCheck xenoRefGene xenoRefSeqId doJoinerCheck mgcGenes mgcAccession doJoinerCheck orfeomeGenes orfeomeAccession doJoinerCheck all_mrna nativeMrnaAccession doJoinerCheck all_est nativeEstAccession # takes care of intronEst, too doJoinerCheck xenoMrna xenoMrnaAccession doJoinerCheck xenoEst xenoEstAccession echo # run featureBits echo -e "--> Running featureBits. Remember to paste this into the pushqueue:\n" for table in $featureTables do tableExists $db $table && { cmd="nice featureBits -countGaps $db $table" echo $cmd $cmd || allOk=no cmd="nice featureBits -countGaps $db $table gap" echo $cmd $cmd || allOk=no echo } done echo # run gbSanity filename=${db}.sanity.$(date +%m-%d-%Y.%R%p) echo -e "--> Running gbSanity, and putting output in:" echo -e "${HOST}:$sanityPath/misc/$filename" # need to run in a subshell, since gbSanity wants to be run from this dir (cd $sanityPath && bin/x86_64/gbSanity $db >& misc/$filename) || allOk=no echo -e "The last line of the gbSanity output gives a summary of errors. Here is the last line of the file. If there are errors, consult Mark Diekhans and Brian Raney:\n" tail -1 $sanityPath/misc/$filename echo # run countPerChrom?? # give overall report of errors if [ $allOk = yes ] then echo No errors were encountered during $(basename $0). Yay! date else echo "There was an ERROR in at least one test, but $(basename $0) ran successfully." date exit 2 fi exit 0