# for emacs: -*- mode: sh; -*- # Pika "Ochotona princeps" # http://www.ncbi.nlm.nih.gov/genome/771 # http://www.ncbi.nlm.nih.gov/bioproject/74593 # http://www.ncbi.nlm.nih.gov/bioproject/12210 - chrMt - Italy ######################################################################### # DOWNLOAD SEQUENCE (DONE braney 2008-07-11) ssh kkstore05 mkdir /cluster/store12/ochPri2 ln -s /cluster/store12/ochPri2 /cluster/data mkdir /cluster/data/ochPri2/broad cd /cluster/data/ochPri2/broad wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.agp \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.bases.gz \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.quals.gz md5sum ass* > assembly.md5sum qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin ochPri2.qual.qac # wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/BasicStats.out #### No BasicStats.out cut -f 1 assembly.agp | uniq -c | wc -l # Number of scaffolds: 187558 ######################################################################### # Create .ra file and run makeGenomeDb.pl ssh kkstore05 cd /cluster/data/ochPri2 cat << _EOF_ >ochPri2.config.ra # Config parameters for makeGenomeDb.pl: db ochPri2 clade mammal genomeCladePriority 35 scientificName Ochotona princeps commonName Pika assemblyDate Jul. 2008 assemblyLabel Broad Institute ochPri2 orderKey 236.5 #mitoAcc AJ222767 mitoAcc none fastaFiles /cluster/data/ochPri2/broad/assembly.bases.gz agpFiles /cluster/data/ochPri2/broad/assembly.agp qualFiles /cluster/data/ochPri2/broad/ochPri2.qual.qac dbDbSpeciesDir pika _EOF_ # use 'screen' make sure on kkstore05 makeGenomeDb.pl -verbose=2 ochPri2.config.ra > makeGenomeDb.out 2>&1 & # 'ctl-a ctl -d' returns to previous shell cut -f 2 chrom.sizes | ave stdin # Q1 1249.000000 # median 4932.000000 # Q3 9905.250000 # average 18371.833534 # min 600.000000 # max 1977105.000000 # count 187558 # total 3445784354.000000 # standard deviation 52187.432301 ######################################################################### # REPEATMASKER (DONE braney 2008-07-29) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/ochPri2/bed/repeatMasker cd /cluster/data/ochPri2/bed/repeatMasker doRepeatMasker.pl -buildDir=/cluster/data/ochPri2/bed/repeatMasker \ ochPri2 > do.log 2>&1 & # Note: can run simpleRepeats simultaneously #### When done with RM: ssh pk para time # Completed: 8024 of 8024 jobs # CPU time in finished jobs: 20582841s 343047.36m 5717.46h 238.23d 0.653 y # IO & Wait Time: 172016s 2866.93m 47.78h 1.99d 0.005 y # Average job time: 2587s 43.11m 0.72h 0.03d # Longest finished job: 8109s 135.15m 2.25h 0.09d # Submission to last job: 121861s 2031.02m 33.85h 1.41d time nice -n +19 featureBits ochPri2 rmsk > fb.ochPri2.rmsk.txt 2>&1 & # 219204536 bases of 1923624051 (11.395%) in intersection # RepeatMasker and lib version from do.log: # Jun 13 2008 (open-3-2-5) version of RepeatMasker # CC RELEASE 20080611; ######################################################################### # SIMPLE REPEATS TRF (DONE braney 2008-07-29) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/ochPri2/bed/simpleRepeat cd /cluster/data/ochPri2/bed/simpleRepeat # doSimpleRepeat.pl -buildDir=/cluster/data/ochPri2/bed/simpleRepeat \ ochPri2 > do.log 2>&1 & #### When done ssh pk para time # Completed: 70 of 70 jobs # CPU time in finished jobs: 51330s 855.49m 14.26h 0.59d 0.002 y # IO & Wait Time: 191s 3.19m 0.05h 0.00d 0.000 y # Average job time: 736s 12.27m 0.20h 0.01d # Longest finished job: 7673s 127.88m 2.13h 0.09d # Submission to last job: 7905s 131.75m 2.20h 0.09d featureBits ochPri2 simpleRepeat # 181386495 bases of 1923624051 (9.429%) in intersection # after RM run is done, add this mask: cd /cluster/data/ochPri2 twoBitMask ochPri2.rmsk.2bit -add bed/simpleRepeat/trfMask.bed ochPri2.2bit twoBitToFa ochPri2.2bit stdout | faSize stdin # 3445784354 bases (1522160303 N's 1923624051 real 1703533517 upper 220090534 # lower) in 187558 sequences in 1 files # Total size: mean 18371.8 sd 52187.6 min 600 (scaffold_187557) max 1977105 # (scaffold_0) median 4932 # N count: mean 8115.7 sd 20418.0 # U count: mean 9082.7 sd 30990.6 # L count: mean 1173.5 sd 4397.6 # %6.39 masked total, %11.44 masked real twoBitToFa ochPri2.rmsk.2bit stdout | faSize stdin # 3445784354 bases (1522160303 N's 1923624051 real 1704790340 upper 218833711 # lower) in 187558 sequences in 1 files # Total size: mean 18371.8 sd 52187.6 min 600 (scaffold_187557) max 1977105 # (scaffold_0) median 4932 # N count: mean 8115.7 sd 20418.0 # U count: mean 9089.4 sd 31003.0 # L count: mean 1166.8 sd 4384.0 # %6.35 masked total, %11.38 masked real # Link to it from /gbdb ln -s /cluster/data/ochPri2/ochPri2.2bit /gbdb/ochPri2/ochPri2.2bit # mkdir /san/sanvol1/scratch/ochPri2 cp /cluster/data/ochPri2/ochPri2.2bit /san/sanvol1/scratch/ochPri2 cp /cluster/data/ochPri2/chrom.sizes /san/sanvol1/scratch/ochPri2 ############################################################################ # ochPri2 - Pika - Ensembl Genes version 51 (DONE - 2008-12-03 - hiram) ssh kolossus cd /hive/data/genomes/ochPri2 cat << '_EOF_' > ochPri2.ensGene.ra # required db variable db ochPri2 # do we need to translate geneScaffold coordinates geneScaffolds yes # ignore genes that do not properly convert to a gene pred, and contig # names that are not in the UCSC assembly skipInvalid yes # ignore the single gene that has an invalid structure from Ensembl: # 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2 '_EOF_' # << happy emacs doEnsGeneUpdate.pl -ensVersion=51 ochPri2.ensGene.ra ssh hgwdev cd /hive/data/genomes/ochPri2/bed/ensGene.51 featureBits ochPri2 ensGene # 25200422 bases of 1923624051 (1.310%) in intersection *** All done! (through the 'makeDoc' step) *** Steps were performed in /hive/data/genomes/ochPri2/bed/ensGene.51 ############################################################################ # cpgIslands - (DONE - 2011-04-23 - Hiram) mkdir /hive/data/genomes/ochPri2/bed/cpgIslands cd /hive/data/genomes/ochPri2/bed/cpgIslands time doCpgIslands.pl ochPri2 > do.log 2>&1 # fixing broken command in script: time ./doLoadCpg.csh # real 2m10.760s # continuing: time doCpgIslands.pl -continue=cleanup ochPri2 > cleanup.log 2>&1 # real 41m18.291s cat fb.ochPri2.cpgIslandExt.txt # 22070916 bases of 1923624051 (1.147%) in intersection ######################################################################### # genscan - (DONE - 2011-04-26 - Hiram) mkdir /hive/data/genomes/ochPri2/bed/genscan cd /hive/data/genomes/ochPri2/bed/genscan time doGenscan.pl ochPri2 > do.log 2>&1 # recovering from power failure, kluster run had just finished # and it had just started on makeBed: time ./doMakeBed.csh # real 329m1.050s # continuing: time doGenscan.pl -continue=load ochPri2 > load.log 2>&1 # real 12m8.926s # failed out of inodes on hive: time doGenscan.pl -continue=cleanup ochPri2 > cleanup.log 2>&1 # real 35m52.560s cat fb.ochPri2.genscan.txt # 76450061 bases of 1923624051 (3.974%) in intersection cat fb.ochPri2.genscanSubopt.txt # 102217862 bases of 1923624051 (5.314%) in intersection ######################################################################### # windowMasker - (DONE - 2012-05-02 - Hiram) screen -S ochPri2 mkdir /hive/data/genomes/ochPri2/bed/windowMasker cd /hive/data/genomes/ochPri2/bed/windowMasker # trying out new version of the script that does all the usual steps # that used to be performed manually after it was done time /cluster/home/hiram/kent/src/hg/utils/automation/doWindowMasker.pl \ -workhorse=hgwdev -buildDir=`pwd` -dbHost=hgwdev ochPri2 > do.log 2>&1 # real 904m39.415s # fixing a broken command in the doLoad step: time ./lastLoad.csh # real 7m10.582s time /cluster/home/hiram/kent/src/hg/utils/automation/doWindowMasker.pl \ -continue=cleanup -workhorse=hgwdev -buildDir=`pwd` \ -dbHost=hgwdev ochPri2 > cleanup.log 2>&1 # real 1m45.497s cat fb.ochPri2.windowmaskerSdust.beforeClean.txt # 2224659037 bases of 3445784354 (64.562%) in intersection sed -e 's/^/ #\t/' fb.ochPri2.windowmaskerSdust.clean.txt # 702498734 bases of 3445784354 (20.387%) in intersection sed -e 's/^/ #\t/' fb.ochPri2.rmsk.windowmaskerSdust.txt # 79974550 bases of 3445784354 (2.321%) in intersection ######################################################################### # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram) # Use -repMatch=900, based on size -- for human we use 1024 # use the "real" number from the faSize measurement, # hg19 is 2897316137, calculate the ratio factor for 1024: calc \( 1923624051 / 2897316137 \) \* 1024 # ( 1923624051 / 2897316137 ) * 1024 = 679.867483 # round up to 700 cd /hive/data/genomes/ochPri2 time blat ochPri2.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/ochPri2.11.ooc -repMatch=700 # Wrote 19533 overused 11-mers to jkStuff/ochPri2.11.ooc # real 0m51.997s # there are no non-bridged gaps, no lift file needed for genbank hgsql -N -e "select bridge from gap;" ochPri2 | sort | uniq -c # 530221 yes # cd /hive/data/genomes/ochPri2/jkStuff # gapToLift ochPri2 ochPri2.nonBridged.lift -bedFile=ochPri2.nonBridged.bed # largest non-bridged contig: # awk '{print $3-$2,$0}' ochPri2.nonBridged.bed | sort -nr | head # 123773608 chrX 95534 123869142 chrX.01 ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Ochotona princeps 1 0 0 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add ochPri2 just after ce2 # ochPri2 (Pika) ochPri2.serverGenome = /hive/data/genomes/ochPri2/ochPri2.2bit ochPri2.clusterGenome = /hive/data/genomes/ochPri2/ochPri2.2bit ochPri2.ooc = /hive/data/genomes/ochPri2/jkStuff/ochPri2.11.ooc ochPri2.lift = no ochPri2.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} ochPri2.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} ochPri2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} ochPri2.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} ochPri2.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} ochPri2.refseq.mrna.native.load = no ochPri2.refseq.mrna.xeno.load = yes ochPri2.genbank.mrna.xeno.load = yes ochPri2.genbank.est.native.load = no ochPri2.downloadDir = ochPri2 ochPri2.perChromTables = no # end of section added to etc/genbank.conf git commit -m "adding ochPri2 pika" etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for micMurNames" \ src/lib/gbGenome.c git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S ochPri2 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial ochPri2 & # var/build/logs/2012.06.08-09:50:30.ochPri2.initalign.log # real 2405m45.707s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad ochPri2 & # logFile: var/dbload/hgwdev/logs/2012.06.11-13:42:34.dbload.log # real 10m29.923s # enable daily alignment and update of hgwdev (DONE - 2012-06-11 - Hiram) cd ~/kent/src/hg/makeDb/genbank git pull # add ochPri2 to: vi etc/align.dbs etc/hgwdev.dbs git commit -m "Added ochPri2." etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # set default position to RHO gene displays (DONE - 2012-07-24 - Hiram) hgsql -e \ 'update dbDb set defaultPos="scaffold_6491:75719-81052" where name="ochPri2";' \ hgcentraltest ############################################################################ # pushQ entry (DONE - 2012-07-24 - Hiram) mkdir /hive/data/genomes/ochPri2/pushQ cd /hive/data/genomes/ochPri2/pushQ # Mark says don't let the transMap track get there time makePushQSql.pl ochPri2 2> stderr.txt | grep -v transMap > ochPri2.sql # real 3m37.306s scp -p ochPri2.sql hgwbeta:/tmp ssh hgwbeta "hgsql qapushq < /tmp/ochPri2.sql" ############################################################################