# for emacs: -*- mode: sh; -*- # Caenorhabditis briggsae # Washington University School of Medicine GSC and Sanger Institute # # $Id: cb3.txt,v 1.15 2008/06/12 18:41:14 hiram Exp $ ########################################################################### ## Download sequence (DONE - 2007-01-22 - Hiram) ssh kkstore02 mkdir /cluster/store5/worm/cb3 ln -s /cluster/store5/worm/cb3 /cluster/data/cb3 mkdir /cluster/data/cb3/downloads cd /cluster/data/cb3/downloads wget --timestamping \ "http://genome.wustl.edu/pub/organism/Invertebrates/Caenorhabditis_briggsae/assembly/draft/Caenorhabditis_briggsae-1.0/ASSEMBLY" wget --timestamping \ "http://genome.wustl.edu/pub/organism/Invertebrates/Caenorhabditis_briggsae/assembly/draft/Caenorhabditis_briggsae-1.0/README" \ -O README.cb3 wget --timestamping --cut-dirs=10 -m -np -nd \ "ftp://genome.wustl.edu/pub/organism/Invertebrates/Caenorhabditis_briggsae/assembly/draft/Caenorhabditis_briggsae-1.0/output/chromosomes/" ########################################################################### ## Initial sequence (DONE - 2007-02-23 - Hiram) ssh kkstore02 cd /cluster/data/cb3 cat << '_EOF_' > cb3.config.ra # Config parameters for makeGenomeDb.pl: db cb3 clade worm genomeCladePriority 10 scientificName Caenorhabditis briggsae commonName C. briggsae assemblyDate Jan. 2007 assemblyLabel Washington University School of Medicine GSC and Sanger Institute cb3 orderKey 868 # AC186293 mitoAcc 95102164 fastaFiles /cluster/data/cb3/wormbase/chr*.fa.gz agpFiles /cluster/data/cb3/wormbase/chr*.agp.gz # qualFiles /dev/null dbDbSpeciesDir worm '_EOF_' # << happy emacs makeGenomeDb.pl cb3.config.ra > makeGenomeDb.out 2>&1 ########################################################################### ## RepeatMasker (DONE - 2007-02-23 - Hiram) ssh kkstore02 cd /cluster/data/cb3 time nice -n +19 doRepeatMasker.pl cb3 > doRepeatMasker.out 2>&1 & mv doRepeatMasker.out bed/RepeatMasker.2007-02-23 twoBitToFa cb3.rmskTrf.2bit stdout | faSize stdin > faSize.cb3.rmskTrf.txt # real 83m42.529s # 108492946 bases (3041279 N's 105451667 real # 84250398 upper 21201269 lower) in 13 sequences in 1 files # %19.54 masked total, %20.11 masked real ########################################################################### ## Simple Repeats (DONE - 2007-02-24 - Hiram) ssh kolossus mkdir /cluster/data/cb3/bed/simpleRepeat cd /cluster/data/cb3/bed/simpleRepeat twoBitToFa ../../cb3.unmasked.2bit stdout \ | trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \ -bedAt=simpleRepeat.bed -tempDir=/scratch/tmp # real 41m11.623s awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed ssh hgwdev cd /cluster/data/cb3/bed/simpleRepeat nice -n +19 hgLoadBed cb3 simpleRepeat \ /cluster/data/cb3/bed/simpleRepeat/simpleRepeat.bed \ -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql # Loaded 32725 elements of size 16 featureBits cb3 simpleRepeat > fb.cb3.simpleRepeat.txt 2>&1 # 3984931 bases of 108433446 (3.675%) in intersection ########################################################################### ## WindowMasker (DONE - 2007-02-25 - Hiram) ssh kolossus mkdir /cluster/data/cb3/bed/WindowMasker cd /cluster/data/cb3/bed/WindowMasker time nice -n +19 ~/kent/src/hg/utils/automation/doWindowMasker.pl \ cb3 -buildDir=/cluster/data/cb3/bed/WindowMasker \ -workhorse=kolossus > doWM.out 2>&1 & cat /cluster/data/cb3/bed/simpleRepeat/trfMask.bed \ | twoBitMask -add -type=.bed cb3.wmsk.sdust.2bit stdin cb3.sdTrf.n.2bit twoBitToFa cb3.sdTrf.n.2bit stdout | sed -e "s/n/N/g" \ | sed -e "s/chrUN/chrUn/; s/raNdom/random/" \ | faToTwoBit stdin cb3.sdTrf.2bit twoBitToFa cb3.sdTrf.2bit stdout | faSize stdin >> faSize.cb3.sdTrf.txt # 108492946 bases (3041279 N's 105451667 real # 66936237 upper 38515430 lower) in 13 sequences in 1 files # %35.50 masked total, %36.52 masked real ssh hgwdev cd /cluster/data/cb3/bed/WindowMasker hgLoadBed -strict cb3 windowmaskerSdust windowmasker.sdust.bed.gz ######################################################################### # MAKE 11.OOC FILE FOR BLAT (DONE - 2007-04-04 - Hiram) # Use -repMatch=40 (based on size -- for human we use 1024, and # C. briggsae size is ~3.4% of human judging by gapless cb3 vs. hg18 # genome sizes from featureBits. ssh kolossus cd /cluster/data/cb3 blat cb3.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/11.ooc -repMatch=36 # Wrote 53058 overused 11-mers to jkStuff/11.ooc cp -p jkStuff/11.ooc /san/sanvol1/scratch/worms/cb3 ######################################################################### ## Create a lift file for genbank work ssh kkstore02 cd /cluster/data cp -p caePb1/jkStuff/agpToLift.pl cb3/jkStuff cd cb3 for C in downloads/*.agp.gz do zcat "${C}" | ./jkStuff/agpToLift.pl /dev/stdin done > jkStuff/liftAll.lft cp -p jkStuff/liftAll.lft /san/sanvol1/scratch/worms/cb3 ######################################################################### # GENBANK AUTO UPDATE (DONE - 2007-04-04-05 - Hiram) ## re-run with refseq.mrna.xeno added - 2007-04-13 - Hiram # align with latest genbank process. ssh hgwdev cd ~/kent/src/hg/makeDb/genbank cvsup # edit etc/genbank.conf to add cb3 just before cb1 # cb3 (C. briggsae) cb3.serverGenome = /cluster/data/cb3/cb3.2bit cb3.clusterGenome = /san/sanvol1/scratch/worms/cb3/cb3.2bit cb3.ooc = /san/sanvol1/scratch/worms/cb3/11.ooc cb3.lift = /san/sanvol1/scratch/worms/cb3/liftAll.lft cb3.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} cb3.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} cb3.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} cb3.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} cb3.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} cb3.refseq.mrna.native.load = yes cb3.refseq.mrna.xeno.load = yes cb3.refseq.mrna.xeno.loadDesc = yes cb3.genbank.mrna.xeno.load = no cb3.downloadDir = cb3 cvs ci -m "Added cb3." etc/genbank.conf # update /cluster/data/genbank/: make etc-update ssh kkstore02 cd /cluster/data/genbank time nice -n +19 bin/gbAlignStep -initial cb3 & # logFile: var/build/logs/2007.07.11-14:56:44.cb3.initalign.log # 3:19:43 # logFile: var/build/logs/2007.07.10-16:59:38.cb3.initalign.log # real 134m52.159s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad cb3 # logFile: var/dbload/hgwdev/logs/2007.07.11-18:54:00.dbload.log # 15:47.79 - try 3 # logFile: var/dbload/hgwdev/logs/2007.04.13-16:52:24.dbload.log # real 19m32.844s - second time # logFile: var/dbload/hgwdev/logs/2007.04.05-11:06:36.dbload.log # real 9m34.634s - first time # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank cvsup # add cb3 to: etc/align.dbs etc/hgwdev.dbs cvs ci -m "Added cb3 - C. briggsae" etc/align.dbs etc/hgwdev.dbs make etc-update ############################################################################ # BLATSERVERS ENTRY (DONE - 2007-04-05 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("cb3", "blat14", "17788", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("cb3", "blat14", "17789", "0", "1");' \ hgcentraltest # test it with some sequence ############################################################################ ## Default position (DONE - 2007-04-09 - Hiram) ssh hgwdev hgsql -e 'update dbDb set defaultPos="chrII:7677406-7695567" where name="cb3";' hgcentraltest ############################################################################ ## Adding a Photograph, from Eric Hagg in email 2007-04-07 12:04 # -rw-rw-r-- 1 2082920 Apr 9 16:13 Cb-fem-3(nm63)-XX-#2-3.jpg # Cb-fem-3(nm63)-XX-#2-3.jpg JPEG 2600x2060 mkdir /cluster/data/cb3/photograph cd /cluster/data/cb3/photograph convert -sharpen 0 -normalize -gamma 1.7 \ -geometry "300x300" "Cb-fem-3(nm63)-XX-#2-3.jpg" \ Caenorhabditis_briggsae.jpg # check this .jpg file into the browser doc source tree cvs add -kb Caenorhabditis_briggsae.jpg cvs commit Caenorhabditis_briggsae.jpg cp -p Caenorhabditis_briggsae.jpg /usr/local/apache/htdocs/images ############################################################################ ## BlastZ caeRem2 - (DONE - 2007-04-13 - Hiram) ssh kkstore02 mkdir /cluster/data/cb3/bed/blastz.caeRem2.2007-04-13 cd /cluster/data/cb3/bed/blastz.caeRem2.2007-04-13 cat << '_EOF_' > DEF # cb3 vs caeRem2 BLASTZ_H=2000 BLASTZ_M=50 # TARGET: briggsae Cb3 SEQ1_DIR=/san/sanvol1/scratch/worms/cb3/cb3.rmskTrf.2bit SEQ1_LEN=/san/sanvol1/scratch/worms/cb3/chrom.sizes SEQ1_CHUNK=1000000 SEQ1_LAP=10000 # QUERY: briggsae caeRem2, 9,660 contigs, longest 5,925,111 SEQ2_DIR=/san/sanvol1/scratch/worms/caeRem2/caeRem2.2bit SEQ2_LEN=/san/sanvol1/scratch/worms/caeRem2/chrom.sizes SEQ2_CTGDIR=/san/sanvol1/scratch/worms/caeRem2/caeRem2.contigs.2bit SEQ2_CTGLEN=/san/sanvol1/scratch/worms/caeRem2/caeRem2.contigs.sizes SEQ2_LIFT=/san/sanvol1/scratch/worms/caeRem2/caeRem2.chrUn.lift SEQ2_CHUNK=1000000 SEQ2_LAP=10000 SEQ2_LIMIT=50 BASE=/cluster/data/cb3/bed/blastz.caeRem2.2007-04-13 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 -bigClusterHub=kk \ -blastzOutRoot /cluster/bluearc/cb3CaeRem2 > do.log 2>&1 & # A single job thought it failed, it had not time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 -bigClusterHub=kk \ -continue=cat \ -blastzOutRoot /cluster/bluearc/cb3CaeRem2 > cat.log 2>&1 & cat fb.cb3.chainCaeRem2Link.txt # 53199792 bases of 108433446 (49.062%) in intersection # The following is also in caeRem2.txt mkdir /cluster/data/caeRem2/bed/blastz.cb3.swap cd /cluster/data/caeRem2/bed/blastz.cb3.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 -bigClusterHub=kk \ -swap \ /cluster/data/cb3/bed/blastz.caeRem2.2007-04-13/DEF > swap.log 2>&1 & # The typical failure: # netChains: looks like previous stage was not successful (can't find # [caeRem2.cb3.]all.chain[.gz]). time nice -n +19 doBlastzChainNet.pl -verbose=2 -bigClusterHub=kk \ -continue=net -swap \ /cluster/data/cb3/bed/blastz.caeRem2.2007-04-13/DEF > net.log 2>&1 & # real 9m6.800s cat fb.caeRem2.chainCb3Link.txt # 63292118 bases of 146898439 (43.086%) in intersection ######################################################################### ## Create goldenPath files for hgdownload (DONE - 2007-04-13 - Hiram) ssh kkstore02 mkdir /cluster/data/cb3/goldenPath cd /cluster/data/cb3/goldenPath mkdir bigZips chromosomes database cd chromosomes for C in `awk '{print $1}' ../../chrom.sizes` do twoBitToFa -seq=${C} ../../cb3.2bit stdout | gzip -c > ${C}.fa.gz echo ${C} done done # check the sequence faSize *.fa.gz # 108492946 bases (3041279 N's 105451667 real # 84250398 upper 21201269 lower) in 13 sequences in 13 files # %19.54 masked total, %20.11 masked real twoBitToFa ../../cb3.2bit stdout | faSize stdin # 108492946 bases (3041279 N's 105451667 real # 84250398 upper 21201269 lower) in 13 sequences in 1 files # %19.54 masked total, %20.11 masked real # the chrM definition is missing from cb3.agp # edit that file to add the line: # chrM 1 14420 1 F AC186293.1 1 14420 + cd ../bigZips splitFileByColumn -ending=agp ../../cb3.agp . tar cvzf ./chromAgp.tar.gz ./chr*.agp rm chr*.agp for C in `awk '{print $1}' ../../chrom.sizes` do zcat ../chromosomes/${C}.fa.gz > ${C}.fa echo chr${C} done done # Verify sequence faSize ch*.fa # 108492946 bases (3041279 N's 105451667 real # 84250398 upper 21201269 lower) in 13 sequences in 13 files # %19.54 masked total, %20.11 masked real tar cvzf ./chromFa.tar.gz chr*.fa for F in chr*.fa do maskOutFa $F hard $F.masked done tar cvzf chromFaMasked.tar.gz ./chr*.fa.masked rm chr*.fa chr*.fa.masked cp -p ../../*/chr*.fa.out . tar cvzf ./chromOut.tar.gz chr*.fa.out rm chr*.fa.out splitFileByColumn -ending=bed ../../bed/simpleRepeat/trfMask.bed . tar cvzf ./chromTrf.tar.gz chr*.bed rm chr*.bed ssh hgwdev # get GenBank native mRNAs cd /cluster/data/genbank ./bin/x86_64/gbGetSeqs -db=cb3 -native GenBank mrna \ /cluster/data/cb3/goldenPath/bigZips/mrna.fa.gz # get GenBank xeno mRNAs ./bin/x86_64/gbGetSeqs -db=cb3 -xeno GenBank mrna \ /cluster/data/cb3/goldenPath/bigZips/xenoMrna.fa.gz # get native GenBank ESTs ./bin/x86_64/gbGetSeqs -db=cb3 -native GenBank est \ /cluster/data/cb3/goldenPath/bigZips/est.fa.gz # There are no native RefSeq mRNAs # get native RefSeq mRNAs # ./bin/x86_64/gbGetSeqs -db=cb3 -native RefSeq mrna \ # /cluster/data/cb3/goldenPath/bigZips/refMrna.fa ssh kkstore02 cd /cluster/data/cb3/goldenPath/bigZips md5sum *.gz > md5sum.txt # Fetch a README.txt file from something related to this and # ensure it is correct ########################################################################## ## Fixup chrM_gold missing table entry (DONE - 2007-04-13 - Hiram) ssh hgwdev mkdir /cluster/data/cb3/bed/gold cd /cluster/data/cb3/bed/gold hgsql -N -e "select * from chrM_gold;" hg18 > chrM_gold.tab # Edit that to make it read: # 585 chrM 0 14420 1 F AC186293.1 0 14420 + sed -e "s/agpFrag/chrM_gold/" $HOME/kent/src/hg/lib/agpFrag.sql \ > chrM_gold.sql # edit that to add the bin column, then # bin smallint(6) NOT NULL default '0', hgLoadSqlTab cb3 chrM_gold chrM_gold.sql chrM_gold.tab ############################################################################ ## BlastZ caePb1 - (DONE - 2007-04-18 - Hiram) ssh kkstore01 mkdir /cluster/data/cb3/bed/blastz.caePb1.2007-04-18 cd /cluster/data/cb3/bed/blastz.caePb1.2007-04-18 cat << '_EOF_' > DEF # cb3 vs caePb1 BLASTZ_H=2000 BLASTZ_M=50 # TARGET: briggsae Cb3 SEQ1_DIR=/san/sanvol1/scratch/worms/cb3/cb3.rmskTrf.2bit SEQ1_LEN=/san/sanvol1/scratch/worms/cb3/chrom.sizes SEQ1_CHUNK=1000000 SEQ1_LAP=10000 # QUERY: C. PB2801 caePb1 SEQ2_DIR=/san/sanvol1/scratch/worms/caePb1/caePb1.2bit SEQ2_LEN=/san/sanvol1/scratch/worms/caePb1/chrom.sizes SEQ2_CTGDIR=/san/sanvol1/scratch/worms/caePb1/caePb1.contigs.sdTrf.2bit SEQ2_CTGLEN=/san/sanvol1/scratch/worms/caePb1/caePb1.contigs.sizes SEQ2_LIFT=/san/sanvol1/scratch/worms/caePb1/caePb1.contigs.lift SEQ2_CHUNK=1000000 SEQ2_LAP=10000 SEQ2_LIMIT=50 BASE=/cluster/data/cb3/bed/blastz.caePb1.2007-04-18 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 \ -qRepeats=windowmaskerSdust -bigClusterHub=kk \ -blastzOutRoot /cluster/bluearc/cb3CaePb1 > do.log 2>&1 & # real 68m42.057s cat fb.cb3.chainCaePb1Link.txt # 42772225 bases of 108433446 (39.446%) in intersection # The following is also in caePb1.txt mkdir /cluster/data/caePb1/bed/blastz.cb3.swap cd /cluster/data/caePb1/bed/blastz.cb3.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /cluster/data/cb3/bed/blastz.caePb1.2007-04-18/DEF \ -qRepeats=windowmaskerSdust -bigClusterHub=kk \ -swap > swap.log 2>&1 & # real 3m57.854s cat fb.caeRem2.chainCb3Link.txt # 59366018 bases of 175247318 (33.876%) in intersection ########################################################################## ## BLASTZ priPac1 (DONE - 2007-02-25 - Hiram) ## early experiment, re-done 2007-04-21 ssh kkstore02 mkdir /cluster/data/cb3/bed/blastz.priPac1.2007-02-25 cd /cluster/data/cb3/bed/blastz.priPac1.2007-02-25 cat << '_EOF_' > DEF # cb3 vs priPac1 BLASTZ_H=2000 BLASTZ_M=50 # TARGET: briggsae Cb3 SEQ1_DIR=/san/sanvol1/scratch/worms/cb3/cb3.rmskTrf.2bit SEQ1_LEN=/san/sanvol1/scratch/worms/cb3/chrom.sizes SEQ1_CHUNK=1000000 SEQ1_LAP=10000 # QUERY: Pristionchus pacificus priPac1 SEQ2_DIR=/san/sanvol1/scratch/worms/priPac1/priPac1.rmskTrf.2bit SEQ2_LEN=/san/sanvol1/scratch/worms/priPac1/chrom.sizes SEQ2_CHUNK=1000000 SEQ2_LAP=10000 SEQ2_LIMIT=50 BASE=/cluster/data/cb3/bed/blastz.priPac1.2007-02-25 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time doBlastzChainNet.pl DEF -verbose=2 -bigClusterHub=pk \ -blastzOutRoot /cluster/bluearc/cb3PriPac1 > do.log 2>&1 & cat fb.cb3.chainPriPac1Link.txt # 6925266 bases of 108433446 (6.387%) in intersection ########################################################################## ## BLASTZ priPac1 (DONE - 2007-04-21 - Hiram) ssh kkstore02 mkdir /cluster/data/cb3/bed/blastz.priPac1.um.2007-04-21 cd /cluster/data/cb3/bed/blastz.priPac1.um.2007-04-21 cat << '_EOF_' > DEF # cb3 vs priPac1 BLASTZ_H=2000 BLASTZ_M=50 # TARGET: briggsae Cb3 SEQ1_DIR=/san/sanvol1/scratch/worms/cb3/cb3.rmskTrf.2bit SEQ1_LEN=/san/sanvol1/scratch/worms/cb3/chrom.sizes SEQ1_CHUNK=1000000 SEQ1_LAP=10000 # QUERY: Pristionchus pacificus priPac1 SEQ2_DIR=/san/sanvol1/scratch/worms/priPac1/priPac1.unmasked.2bit SEQ2_LEN=/san/sanvol1/scratch/worms/priPac1/chrom.sizes SEQ2_CHUNK=1000000 SEQ2_LAP=10000 SEQ2_LIMIT=50 BASE=/cluster/data/cb3/bed/blastz.priPac1.um.2007-04-21 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time nice -n +19 doBlastzChainNet.pl DEF -verbose=2 -bigClusterHub=pk \ -blastzOutRoot /cluster/bluearc/cb3PriPac1 > do.log 2>&1 & # real 40m45.455s cat fb.cb3.chainPriPac1Link.txt # 7075879 bases of 108433446 (6.526%) in intersection ## swap to priPac1 - also in priPac1.txt mkdir /cluster/data/priPac1/bed/blastz.cb3.swap cd /cluster/data/priPac1/bed/blastz.cb3.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 -bigClusterHub=pk \ /cluster/data/cb3/bed/blastz.priPac1.um.2007-04-21/DEF \ -swap > swap.log 2>&1 & # real 0m48.872s cat fb.priPac1.chainCb3Link.txt # 7800313 bases of 145948246 (5.345%) in intersection ########################################################################## ## SWAP BLASTZ ce4 (DONE - 2007-02-25 - Hiram) ssh kkstore02 cd /cluster/data/ce4/bed/blastz.cb3.2007-02-25 cat fb.ce4.chainCb3Link.txt # 42491022 bases of 100281244 (42.372%) in intersection mkdir /cluster/data/cb3/bed/blastz.ce4.swap cd /cluster/data/cb3/bed/blastz.ce4.swap time ~/kent/src/hg/utils/automation/doBlastzChainNet.pl \ /cluster/data/ce4/bed/blastz.cb3.2007-02-25/DEF \ -verbose=2 -bigClusterHub=pk -swap > swap.log 2>&1 & # real 3m18.164s cat fb.cb3.chainCe4Link.txt # 43181535 bases of 108433446 (39.823%) in intersection ########################################################################## ## summarize chainLink measurements (2007-04-25 - Hiram) # org on cb3 on other # caeRem2 49.062 43.086 # ce4 39.823 42.372 # caePb1 39.446 33.876 # priPac1 6.526 5.345 ########################################################################### # ELEGANS (ce4) PROTEINS TRACK (DONE - Hiram - 2007-04-27,30) ssh kkstore02 # breaking up this target genome into manageable pieces mkdir /cluster/data/cb3/blastDb cd /cluster/data/cb3 twoBitToFa cb3.unmasked.2bit temp.fa faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft # 139 pieces of 139 written rm temp.fa cd blastDb for i in *.fa do /cluster/bluearc/blast229/formatdb -i $i -p F done rm *.fa ## copy to san for kluster access mkdir -p /san/sanvol1/scratch/worms/cb3/blastDb cd /san/sanvol1/scratch/worms/cb3/blastDb rsync -a --progress --stats /cluster/data/cb3/blastDb/ . ## create the query protein set mkdir -p /cluster/data/cb3/bed/tblastn.ce4SG cd /cluster/data/cb3/bed/tblastn.ce4SG echo /san/sanvol1/scratch/worms/cb3/blastDb/*.nsq | xargs ls -S \ | sed "s/\.nsq//" > query.lst wc -l query.lst # 139 query.lst # we want around 50000 jobs calc `wc /cluster/data/ce4/bed/blat.ce4SG/ce4SG.psl | awk "{print \\\$1}"`/\(50000/`wc query.lst | awk "{print \\\$1}"`\) # 23192/(50000/139) = 64.473760 mkdir -p /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/sgfa split -l 65 /cluster/data/ce4/bed/blat.ce4SG/ce4SG.psl \ /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/sgfa/sg ln -s /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/sgfa sgfa cd sgfa for i in *; do nice pslxToFa $i $i.fa; rm $i; done cd .. ls -1S sgfa/*.fa > sg.lst mkdir -p /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/blastOut ln -s /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/blastOut for i in `cat sg.lst`; do mkdir blastOut/`basename $i .fa`; done cd /cluster/data/cb3/bed/tblastn.ce4SG cat << '_EOF_' > template #LOOP blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > blastSome #!/bin/sh BLASTMAT=/cluster/bluearc/blast229/data export BLASTMAT g=`basename $2` f=/tmp/`basename $3`.$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8 then mv $f.8 $f.1 break; fi done if test -f $f.1 then if /cluster/bin/i386/blastToPsl $f.1 $f.2 then liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/cb3/blastDb.lft carry $f.2 liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/ce4/bed/blat.ce4SG/protein.lft warn $f.3 if pslCheck -prot $3.tmp then mv $3.tmp $3 rm -f $f.1 $f.2 $f.3 $f.4 fi exit 0 fi fi rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4 exit 1 '_EOF_' # << happy emacs chmod +x blastSome ssh pk cd /cluster/data/cb3/bed/tblastn.ce4SG gensub2 query.lst sg.lst template jobList para create jobList # para try, check, push, check etc. # Completed: 49623 of 49623 jobs # CPU time in finished jobs: 517498s 8624.97m 143.75h 5.99d 0.016 y # IO & Wait Time: 160940s 2682.33m 44.71h 1.86d 0.005 y # Average job time: 14s 0.23m 0.00h 0.00d # Longest finished job: 39s 0.65m 0.01h 0.00d # Submission to last job: 3858s 64.30m 1.07h 0.04d # do the cluster run for chaining ssh pk mkdir /cluster/data/cb3/bed/tblastn.ce4SG/chainRun cd /cluster/data/cb3/bed/tblastn.ce4SG/chainRun cat << '_EOF_' > template #LOOP chainOne $(path1) #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > chainOne (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/blastOut/c.`basename $1`.psl) '_EOF_' # << happy emacs chmod +x chainOne ls -1dS /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG/blastOut/sg?? \ > chain.lst gensub2 chain.lst single template jobList cd /cluster/data/cb3/bed/tblastn.ce4SG/chainRun para create jobList para maxNode 30 para try, check, push, check etc. # Completed: 357 of 357 jobs # CPU time in finished jobs: 84s 1.39m 0.02h 0.00d 0.000 y # IO & Wait Time: 25172s 419.54m 6.99h 0.29d 0.001 y # Average job time: 71s 1.18m 0.02h 0.00d # Longest finished job: 95s 1.58m 0.03h 0.00d # Submission to last job: 124s 2.07m 0.03h 0.00d ssh kkstore02 cd /cluster/data/cb3/bed/tblastn.ce4SG/blastOut for i in sg?? do cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl sort -rn c60.$i.psl | pslUniq stdin u.$i.psl awk "((\$1 / \$11) ) > 0.60 { print }" c60.$i.psl > m60.$i.psl echo $i done sort -T /scratch/tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq \ > /cluster/data/cb3/bed/tblastn.ce4SG/blastCe4SG.psl cd .. pslCheck blastCe4SG.psl # load table ssh hgwdev cd /cluster/data/cb3/bed/tblastn.ce4SG hgLoadPsl cb3 blastCe4SG.psl # check coverage featureBits cb3 blastCe4SG # 18218293 bases of 108433446 (16.801%) in intersection featureBits caeRem2 blastCe4SG # 19763359 bases of 146898439 (13.454%) in intersection featureBits caePb1 blastCe4SG # 22988044 bases of 175247318 (13.117%) in intersection featureBits priPac1 blastCe4SG # 5617285 bases of 145948246 (3.849%) in intersection featureBits ce4 sangerGene # 27906202 bases of 100281244 (27.828%) in intersection ssh kkstore02 rm -rf /cluster/data/cb3/bed/tblastn.ce4SG/blastOut rm -rf /cluster/bluearc/worms/cb3/bed/tblastn.ce4SG #end tblastn ############################################################################ ## Reset defaultDb genome (DONE - 2007-04-30 - Hiram) ssh hgwdev hgsql -e 'update defaultDb set name="cb3" where name="cb2";' hgcentraltest ######################################################################### # ELEGANS (ce6) PROTEINS TRACK (DONE - Hiram - 2008-06-11) ssh kkstore02 # this breaking up was already done when ce4 genes were done mkdir /cluster/data/cb3/blastDb cd /cluster/data/cb3 twoBitToFa cb3.unmasked.2bit temp.fa faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft # 177 pieces of 177 written rm temp.fa cd blastDb for i in *.fa do /cluster/bluearc/blast229/formatdb -i $i -p F done rm *.fa ## copy to san for kluster access mkdir -p /san/sanvol1/scratch/worms/cb3/blastDb cd /san/sanvol1/scratch/worms/cb3/blastDb rsync -a --progress --stats /cluster/data/cb3/blastDb/ . ## create the query protein set mkdir -p /cluster/data/cb3/bed/tblastn.ce6SG cd /cluster/data/cb3/bed/tblastn.ce6SG echo /san/sanvol1/scratch/worms/cb3/blastDb/*.nsq | xargs ls -S \ | sed "s/\.nsq//" > query.lst wc -l query.lst # 139 query.lst # we want around 50000 jobs calc `wc /cluster/data/ce6/bed/blat.ce6SG/ce6SG.psl | awk "{print \\\$1}"`/\(50000/`wc query.lst | awk "{print \\\$1}"`\) # 23741/(50000/139) = 65.999980 mkdir -p /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/sgfa split -l 66 /cluster/data/ce6/bed/blat.ce6SG/ce6SG.psl \ /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/sgfa/sg ln -s /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/sgfa sgfa cd sgfa for i in *; do nice pslxToFa $i $i.fa; rm $i; done cd .. ls -1S sgfa/*.fa > sg.lst mkdir -p /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/blastOut ln -s /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/blastOut for i in `cat sg.lst`; do mkdir blastOut/`basename $i .fa`; done cd /cluster/data/cb3/bed/tblastn.ce6SG cat << '_EOF_' > template #LOOP blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > blastSome #!/bin/sh DB=cb3 BLASTMAT=/cluster/bluearc/blast229/data SCR="/scratch/tmp/${DB}" g=`basename $2` D=`basename $1` export BLASTMAT DB SCR g D mkdir -p ${SCR} cp -p $1.* ${SCR} f=${SCR}/`basename $3`.$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no \ -e $eVal -p tblastn -d ${SCR}/$D -i $2 -o $f.8 then mv $f.8 $f.1 break; fi done if test -f $f.1 then if /cluster/bin/i386/blastToPsl $f.1 $f.2 then liftUp -nosort -type=".psl" -nohead $f.3 \ /cluster/data/${DB}/blastDb.lft carry $f.2 > /dev/null liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp \ /cluster/data/ce6/bed/blat.ce6SG/protein.lft warn $f.3 > /dev/null if pslCheck -prot $3.tmp then mv $3.tmp $3 rm -f $f.1 $f.2 $f.3 $f.4 ${SCR}/$D.* rmdir --ignore-fail-on-non-empty ${SCR} fi exit 0 fi fi rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4 ${SCR}/$D.* rmdir --ignore-fail-on-non-empty ${SCR} exit 1 '_EOF_' # << happy emacs chmod +x blastSome ssh kk cd /cluster/data/cb3/bed/tblastn.ce6SG gensub2 query.lst sg.lst template jobList para create jobList # para try, check, push, check etc. # Completed: 50040 of 50040 jobs # CPU time in finished jobs: 1544379s 25739.65m 428.99h 17.87d 0.049 y # IO & Wait Time: 1377110s 22951.84m 382.53h 15.94d 0.044 y # Average job time: 58s 0.97m 0.02h 0.00d # Longest finished job: 1849s 30.82m 0.51h 0.02d # Submission to last job: 6427s 107.12m 1.79h 0.07d # do the cluster run for chaining ssh kk mkdir /cluster/data/cb3/bed/tblastn.ce6SG/chainRun cd /cluster/data/cb3/bed/tblastn.ce6SG/chainRun cat << '_EOF_' > template #LOOP chainOne $(path1) #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > chainOne (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/blastOut/c.`basename $1`.psl) '_EOF_' # << happy emacs chmod +x chainOne ls -1dS /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG/blastOut/sg?? \ > chain.lst gensub2 chain.lst single template jobList cd /cluster/data/cb3/bed/tblastn.ce6SG/chainRun para create jobList para -maxJob=30 push para try, check, push, check etc. # Completed: 360 of 360 jobs # CPU time in finished jobs: 294s 4.89m 0.08h 0.00d 0.000 y # IO & Wait Time: 1231s 20.52m 0.34h 0.01d 0.000 y # Average job time: 4s 0.07m 0.00h 0.00d # Longest finished job: 13s 0.22m 0.00h 0.00d # Submission to last job: 193s 3.22m 0.05h 0.00d ssh kkstore02 cd /cluster/data/cb3/bed/tblastn.ce6SG/blastOut for i in sg?? do cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl sort -rn c60.$i.psl | pslUniq stdin u.$i.psl awk "((\$1 / \$11) ) > 0.60 { print }" c60.$i.psl > m60.$i.psl echo $i done sort -T /scratch/tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq \ > /cluster/data/cb3/bed/tblastn.ce6SG/blastCe6SG.psl cd .. pslCheck blastCe6SG.psl # checked: 25405 failed: 0 errors: 0 # load table ssh hgwdev cd /cluster/data/cb3/bed/tblastn.ce6SG hgLoadPsl cb3 blastCe6SG.psl # check coverage featureBits cb3 blastCe6SG # 18431207 bases of 108433446 (16.998%) in intersection featureBits cb3 blastCe4SG # 18218293 bases of 108433446 (16.801%) in intersection featureBits ce6 sangerGene # 28134889 bases of 100281426 (28.056%) in intersection ssh kkstore02 rm -rf /cluster/data/cb3/bed/tblastn.ce6SG/blastOut rm -rf /cluster/bluearc/worms/cb3/bed/tblastn.ce6SG ######################################################################### # LASTZ SWAP ce9 (DONE - 2010-09-20 - Hiram) # original alignment cd /hive/data/genomes/ce9/bed/blastzCb3.2010-09-20/DEF \ cat fb.ce9.chainCb3Link.txt # 42421395 bases of 100286004 (42.300%) in intersection # and for the swap mkdir /hive/data/genomes/cb3/bed/blastz.ce9.swap cd /hive/data/genomes/cb3/bed/blastz.ce9.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/ce9/bed/blastzCb3.2010-09-20/DEF \ -workhorse=hgwdev -bigClusterHub=pk -smallClusterHub=memk \ -swap > swap.log 2>&1 & # real 2m48.539s cat fb.cb3.chainCe9Link.txt # 43115973 bases of 108433446 (39.763%) in intersection time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/ce9/bed/blastzCb3.2010-09-20/DEF \ -workhorse=hgwdev -bigClusterHub=pk -smallClusterHub=memk \ -continue=syntenicNet -syntenicNet -swap > synNet.log 2>&1 & # real 0m39.439s ######################################################################## # ELEGANS (ce9) PROTEINS TRACK (DONE - 2010-10-07 - Hiram) # this breaking up was already done when ce4 genes were done # reworking this with new blast binaries cd /hive/data/genomes/cb3 mv blastDb blastDb.2007-04-27 mv blastDb.lft blastDb.lft.2007-04-27 mkdir blastDb twoBitToFa cb3.unmasked.2bit temp.fa faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft # 139 pieces of 139 written rm temp.fa cd blastDb for i in *.fa do /scratch/data/blast-2.2.11/bin/formatdb -i $i -p F done rm *.fa ## create the query protein set mkdir -p /hive/data/genomes/cb3/bed/tblastn.ce9SG cd /hive/data/genomes/cb3/bed/tblastn.ce9SG echo /hive/data/genomes/cb3/blastDb/*.nsq | xargs ls -S \ | sed "s/\.nsq//" > query.lst wc -l query.lst # 139 query.lst # we want around 50000 jobs calc `wc /hive/data/genomes/ce9/bed/blat.ce9SG/ce9SG.psl | awk "{print \\\$1}"`/\(50000/`wc query.lst | awk "{print \\\$1}"`\) # 28103/(50000/139) = 78.126340 mkdir -p sgfa split -l 79 /hive/data/genomes/ce9/bed/blat.ce9SG/ce9SG.psl sgfa/sg cd sgfa for i in *; do nice pslxToFa $i $i.fa; rm $i; done cd .. ls -1S sgfa/*.fa > sg.lst mkdir -p blastOut for i in `cat sg.lst`; do mkdir blastOut/`basename $i .fa`; done cat << '_EOF_' > template #LOOP blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > blastSome #!/bin/sh DB=cb3 BLASTMAT=/scratch/data/blast-2.2.11/data SCR="/scratch/tmp/${DB}" g=`basename $2` D=`basename $1` export BLASTMAT DB SCR g D mkdir -p ${SCR} cp -p $1.* ${SCR} f=${SCR}/`basename $3`.$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /scratch/data/blast-2.2.11/bin/blastall -M BLOSUM80 -m 0 -F no \ -e $eVal -p tblastn -d ${SCR}/$D -i $2 -o $f.8 then mv $f.8 $f.1 break; fi done if test -f $f.1 then if /cluster/bin/x86_64/blastToPsl $f.1 $f.2 then liftUp -nosort -type=".psl" -nohead $f.3 \ /hive/data/genomes/${DB}/blastDb.lft carry $f.2 > /dev/null liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp \ /hive/data/genomes/ce9/bed/blat.ce9SG/protein.lft warn $f.3 > /dev/null if pslCheck -prot $3.tmp then mv $3.tmp $3 rm -f $f.1 $f.2 $f.3 $f.4 ${SCR}/$D.* rmdir --ignore-fail-on-non-empty ${SCR} fi exit 0 fi fi rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4 ${SCR}/$D.* rmdir --ignore-fail-on-non-empty ${SCR} exit 1 '_EOF_' # << happy emacs chmod +x blastSome ssh swarm cd /hive/data/genomes/cb3/bed/tblastn.ce9SG gensub2 query.lst sg.lst template jobList para create jobList para try, check, push, check etc. # Completed: 49484 of 49484 jobs # CPU time in finished jobs: 352680s 5878.00m 97.97h 4.08d 0.011 y # IO & Wait Time: 151932s 2532.20m 42.20h 1.76d 0.005 y # Average job time: 10s 0.17m 0.00h 0.00d # Longest finished job: 26s 0.43m 0.01h 0.00d # Submission to last job: 1665s 27.75m 0.46h 0.02d # do the cluster run for chaining ssh swarm mkdir /hive/data/genomes/cb3/bed/tblastn.ce9SG/chainRun cd /hive/data/genomes/cb3/bed/tblastn.ce9SG/chainRun cat << '_EOF_' > template #LOOP chainOne $(path1) #ENDLOOP '_EOF_' # << happy emacs cat << '_EOF_' > chainOne (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin /hive/data/genomes/cb3/bed/tblastn.ce9SG/blastOut/c.`basename $1`.psl) '_EOF_' # << happy emacs chmod +x chainOne ls -1dS /hive/data/genomes/cb3/bed/tblastn.ce9SG/blastOut/sg?? \ > chain.lst gensub2 chain.lst single template jobList cd /hive/data/genomes/cb3/bed/tblastn.ce9SG/chainRun para create jobList para -maxJob=30 push para try, check, push, check etc. # Completed: 356 of 356 jobs # CPU time in finished jobs: 69s 1.14m 0.02h 0.00d 0.000 y # IO & Wait Time: 1629s 27.16m 0.45h 0.02d 0.000 y # Average job time: 5s 0.08m 0.00h 0.00d # Longest finished job: 8s 0.13m 0.00h 0.00d # Submission to last job: 177s 2.95m 0.05h 0.00d cd /hive/data/genomes/cb3/bed/tblastn.ce9SG/blastOut for i in sg?? do cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl sort -rn c60.$i.psl | pslUniq stdin u.$i.psl awk "((\$1 / \$11) ) > 0.60 { print }" c60.$i.psl > m60.$i.psl echo $i done sort -T /scratch/tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq \ > /hive/data/genomes/cb3/bed/tblastn.ce9SG/blastCe9SG.psl cd .. pslCheck blastCe9SG.psl # checked: 30151 failed: 0 errors: 0 # load table ssh hgwdev cd /hive/data/genomes/cb3/bed/tblastn.ce9SG hgLoadPsl cb3 blastCe9SG.psl # check coverage featureBits cb3 blastCe9SG # 18490367 bases of 108433446 (17.052%) in intersection featureBits cb3 blastCe6SG # 18431207 bases of 108433446 (16.998%) in intersection featureBits cb3 blastCe4SG # 18218293 bases of 108433446 (16.801%) in intersection featureBits caeRem3 blastCe9SG # 20302540 bases of 138406388 (14.669%) in intersection featureBits caePb2 blastCe9SG # 23730009 bases of 170473138 (13.920%) in intersection featureBits caeJap3 blastCe9SG # 12894398 bases of 154057934 (8.370%) in intersection featureBits melHap1 blastCe9SG # 4376245 bases of 53017507 (8.254%) in intersection featureBits melInc1 blastCe9SG # 3882043 bases of 82095019 (4.729%) in intersection featureBits priPac2 blastCe9SG # 5436779 bases of 133634773 (4.068%) in intersection featureBits bruMal1 blastCe9SG # 4424694 bases of 89235536 (4.958%) in intersection featureBits haeCon1 blastCe9SG # 4990746 bases of 278844984 (1.790%) in intersection featureBits ce9 sangerGene # 28689552 bases of 100286004 (28.608%) in intersection rm -rf blastOut ######################################################################### ## LASTZ priPac2 (DONE - 2010-10-19 - Hiram) screen # use screen to control the job mkdir /hive/data/genomes/cb3/bed/blastzPriPac2.2010-10-19 cd /hive/data/genomes/cb3/bed/blastzPriPac2.2010-10-19 cat << '_EOF_' > DEF # cb3 vs priPac2 BLASTZ_H=2000 BLASTZ_M=50 # TARGET: C. briggsae Cb3 SEQ1_DIR=/scratch/data/cb3/cb3.2bit SEQ1_LEN=/hive/data/genomes/cb3/chrom.sizes SEQ1_CHUNK=1000000 SEQ1_LAP=10000 # QUERY: P. pacificus priPac2 SEQ2_DIR=/scratch/data/priPac2/priPac2.2bit SEQ2_LEN=/scratch/data/priPac2/chrom.sizes SEQ2_CHUNK=1000000 SEQ2_LAP=0 SEQ2_LIMIT=50 BASE=/hive/data/genomes/cb3/bed/blastzPriPac2.2010-10-19 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time nice -n +19 doBlastzChainNet.pl -verbose=2 \ `pwd`/DEF \ -workhorse=hgwdev -verbose=2 -bigClusterHub=swarm \ -qRepeats=windowmaskerSdust -smallClusterHub=memk > do.log 2>&1 & # real 10m1.422s cat fb.cb3.chainPriPac2Link.txt # 5930453 bases of 108433446 (5.469%) in intersection # swap, this is also in priPac2.txt mkdir /hive/data/genomes/priPac2/bed/blastz.cb3.swap cd /hive/data/genomes/priPac2/bed/blastz.cb3.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/cb3/bed/blastzPriPac2.2010-10-19/DEF \ -qRepeats=windowmaskerSdust -bigClusterHub=swarm \ -smallClusterHub=swarm -swap > swap.log 2>&1 & # real 1m9.241s cat fb.priPac2.chainCb3Link.txt # 6286306 bases of 133634773 (4.704%) in intersection ############################################################################ # LIFTOVER TO cb4 (DONE - 2011-05-24 - Hiram ) mkdir /hive/data/genomes/cb3/bed/blat.cb4.2011-05-24 cd /hive/data/genomes/cb3/bed/blat.cb4.2011-05-24 # -debug run to create run dir, preview scripts... doSameSpeciesLiftOver.pl \ -buildDir=`pwd` \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/cb3/jkStuff/11.ooc -debug cb3 cb4 # Real run: time nice -n +19 doSameSpeciesLiftOver.pl \ -buildDir=`pwd` \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/cb3/jkStuff/11.ooc cb3 cb4 > do.log 2>&1 # real 3m47.271s # verify it works on genome-test #############################################################################