#!/usr/bin/perl # # dump-test-tables db outdir # # Dump tables from a test database in a form that can be diffed. This removes # variant fields, such as ids and timestamps and sorts the results. This can # be used to compare the results of loading in different ways, such as loading # all available updates, with incrementally obtained updates. # use strict; use warnings; use File::Basename; use FindBin; use lib "$FindBin::Bin/../../lib"; use gbCommon; setTaskName("dump-test-tables"); # command to run mysql my $MYSQL = "hgu-sql -N"; # run mysql and output results to a file sub runMySql($$) { my($outFile, $sqlCmd) = @_; runProg("$MYSQL -e '$sqlCmd' $main::db > $outFile"); } # run mysql and return results as a list of lines sub callMySql($) { my($sqlCmd) = @_; my $out = callProg("$MYSQL -e '$sqlCmd' $main::db"); chomp($out); return split('\n', $out); } if ($#ARGV != 1) { die("wrong # args: dump-test-tables db outdir"); } $main::db = $ARGV[0]; $main::outdir = $ARGV[1]; my $tbldir = "$main::outdir/tbls"; makeDir($tbldir); # save load directory if (-e "load") { removeDir("$main::outdir/load"); renameFile("load", "$main::outdir/load"); } my @chroms = callMySql("select chrom from chromInfo"); push(@chroms, "all"); my %tables; foreach my $table (callMySql("show tables")) { $tables{$table} = 1; } # Used to generate psl chromosome (and all) table names. Tables existance is # check, so this can be a super-set. my @pslTables; foreach my $type ("mrna", "est", "intronEst") { foreach my $chr (@chroms) { push(@pslTables, $chr . "_" . $type); } } push(@pslTables, "xenoMrna"); push(@pslTables, "xenoEst"); foreach my $chr (@chroms) { push(@pslTables, $chr . "_" . "intronEst"); } foreach my $tbl (@pslTables) { if (defined($tables{$tbl})) { runMySql("$tbldir/$tbl.tbl", "SELECT * FROM $tbl ORDER BY " . "tName,tStart,tEnd,qName,qStart,qEnd"); } } # *OrientInfo tables foreach my $type ("mrna", "est") { my $tbl = "${type}OrientInfo"; if (defined($tables{$tbl})) { runMySql("$tbldir/$tbl.tbl", "SELECT * FROM $tbl ORDER BY chrom,chromStart,chromEnd,name"); } } # get contents of mrna tables, joining the values in the unique name table my $mrnaSel = "SELECT gbCdnaInfo.acc, gbCdnaInfo.version, gbCdnaInfo.moddate, gbCdnaInfo.type, gbCdnaInfo.direction," . " author.name AS author, cds.name AS cds, cell.name AS cell," . " description.name AS description, " . " development.name AS development, geneName.name AS geneName," . " keyword.name AS keyword, library.name AS library, " . " mrnaClone.name as mrnaClone, organism.name AS organism," . " productName.name AS productName, sex.name AS sex, " . " source.name AS source, tissue.name AS tissue " . " FROM gbCdnaInfo,author,cds,cell,description,development,geneName,keyword," . " library,mrnaClone,organism,productName,sex,source,tissue" . " WHERE gbCdnaInfo.author=author.id AND gbCdnaInfo.cds=cds.id" . " AND gbCdnaInfo.cell=cell.id AND gbCdnaInfo.description=description.id" . " AND gbCdnaInfo.development=development.id AND gbCdnaInfo.geneName=geneName.id" . " AND gbCdnaInfo.keyword=keyword.id AND gbCdnaInfo.library=library.id" . " AND gbCdnaInfo.mrnaClone=mrnaClone.id AND gbCdnaInfo.organism=organism.id" . " AND gbCdnaInfo.productName=productName.id AND gbCdnaInfo.sex=sex.id" . " AND gbCdnaInfo.source=source.id AND gbCdnaInfo.tissue=tissue.id" . " ORDER BY acc"; runMySql("$tbldir/gbCdnaInfo.tbl", $mrnaSel); # seq dropping any file specific info (which might be differed) runMySql("$tbldir/gbSeq.tbl", "SELECT acc,version,size,file_size,type,srcDb FROM gbSeq ORDER BY acc"); # this always differs, save with no compare esxtension runMySql("$tbldir/gbSeq.nctbl", "SELECT * FROM gbSeq ORDER BY acc"); runMySql("$tbldir/gbExtFile.nctbl", "SELECT * FROM gbExtFile ORDER BY id"); runMySql("$tbldir/gbSeqExtFile.nctbl", "SELECT acc,gbSeq.id,path,gbExtFile.id FROM gbSeq,gbExtFile WHERE gbSeq.gbExtFile = gbExtFile.id ORDER BY acc"); # gbStatus, save select data for diff, plus full table runMySql("$tbldir/gbStatus.tbl", "SELECT acc,version,modDate,type,srcDb,numAligns FROM gbStatus ORDER BY acc"); runMySql("$tbldir/gbStatus.nctbl", "SELECT * FROM gbStatus ORDER BY acc"); # refseq-specific tables my $refLinkSel = "SELECT refLink.name,refLink.product,refLink.mrnaAcc,refLink.protAcc," . " geneName.name,productName.name,refLink.locusLinkId,refLink.omimId " . "FROM refLink,geneName,productName " . "WHERE refLink.geneName=geneName.id and refLink.prodName=productName.id " . "ORDER BY refLink.mrnaAcc"; runMySql("$tbldir/refLink.tbl", $refLinkSel); runMySql("$tbldir/refSeqStatus.tbl", "SELECT mrnaAcc,status FROM refSeqStatus ORDER BY mrnaAcc"); runMySql("$tbldir/refSeqSummary.tbl", "SELECT mrnaAcc,completeness,summary FROM refSeqSummary ORDER BY mrnaAcc"); runMySql("$tbldir/refSeqAli.tbl", "SELECT * FROM refSeqAli ORDER BY tName,tStart,tEnd,qName,qStart,qEnd"); runMySql("$tbldir/refGene.tbl", "SELECT * FROM refGene ORDER BY name,chrom,txStart,txEnd"); runMySql("$tbldir/refFlat.tbl", "SELECT * FROM refFlat ORDER BY geneName,name,chrom,txStart,txEnd"); if (defined($tables{xenoRefSeqAli})) { runMySql("$tbldir/xenoRefSeqAli.tbl", "SELECT * FROM xenoRefSeqAli ORDER BY tName,tStart,tEnd,qName,qStart,qEnd"); runMySql("$tbldir/xenoRefGene.tbl", "SELECT * FROM xenoRefGene ORDER BY name,chrom,txStart,txEnd"); runMySql("$tbldir/xenoRefFlat.tbl", "SELECT * FROM xenoRefFlat ORDER BY geneName,name,chrom,txStart,txEnd"); } runMySql("$tbldir/gbLoaded.tbl", "SELECT srcDb,loadRelease,loadUpdate,type,accPrefix FROM gbLoaded ORDER BY srcDb,loadRelease,loadUpdate,type,accPrefix"); runMySql("$tbldir/imageClone.tbl", "SELECT imageId,acc,type FROM imageClone ORDER BY imageId,acc");