# for emacs: -*- mode: sh; -*- # http://www.ncbi.nlm.nih.gov/bioproject/30809 # http://www.ncbi.nlm.nih.gov/genome/907 # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ABVD00 # sloth ( Choloepus hoffmanni ) ######################################################################### # DOWNLOAD SEQUENCE (DONE braney 2008-10-07) ssh kkstore05 mkdir /cluster/store12/choHof1 ln -s /cluster/store12/choHof1 /cluster/data mkdir /cluster/data/choHof1/broad cd /cluster/data/choHof1/broad wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.agp \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.bases.gz \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.quals.gz md5sum ass* > assembly.md5sum qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin choHof1.qual.qac cut -f 1 assembly.agp | uniq -c | wc -l # Number of scaffolds: 481259 ######################################################################### # Create .ra file and run makeGenomeDb.pl (DONE braney 2008-10-07) ssh kolossus cd /cluster/data/choHof1 cat << _EOF_ >choHof1.config.ra # Config parameters for makeGenomeDb.pl: db choHof1 clade mammal genomeCladePriority 35 scientificName Choloepus hoffmanni commonName Sloth assemblyDate Jul. 2008 assemblyLabel Broad Institute choHof1 orderKey 236.5 #mitoAcc AJ222767 mitoAcc none fastaFiles /cluster/data/choHof1/broad/assembly.bases.gz agpFiles /cluster/data/choHof1/broad/assembly.agp qualFiles /cluster/data/choHof1/broad/choHof1.qual.qac dbDbSpeciesDir sloth _EOF_ makeGenomeDb.pl -workhorse kolossus choHof1.config.ra > makeGenomeDb.out 2>&1 & cut -f 2 chrom.sizes | ave stdin # Q1 1235.000000 # median 2269.000000 # Q3 6207.000000 # average 5109.364438 # min 250.000000 # max 197267.000000 # count 481259 # total 2458927620.000000 # standard deviation 6671.452585 ######################################################################### ## Repeat Masker (DONE - 2008-10-16 - Hiram) screen # to manage this several day job mkdir /hive/data/genomes/choHof1/bed/repeatMasker cd /hive/data/genomes/choHof1/bed/repeatMasker time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \ -workhorse=hgwdev -bigClusterHub=swarm \ -buildDir=`pwd` choHof1 > do.log 2>&1 & # real 907m8.655s twoBitToFa choHof1.rmsk.2bit stdout | faSize stdin > faSize.rmsk.txt # 2458927620 bases (398507935 N's 2060419685 real 1325807439 upper 734612246 # lower) in 481259 sequences in 1 files # %29.88 masked total, %35.65 masked rea ######################################################################### # SIMPLE REPEATS TRF (DONE - 2008-10-15 - Hiram) screen # use a screen to manage this job mkdir /hive/data/genomes/choHof1/bed/simpleRepeat cd /hive/data/genomes/choHof1/bed/simpleRepeat # time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl \ -buildDir=/hive/data/genomes/choHof1/bed/simpleRepeat choHof1 \ > do.log 2>&1 & # real 276m12.206s cat fb.simpleRepeat # 32246798 bases of 2060419685 (1.565%) in intersection # after RM run is done, add this mask: cd /hive/data/genomes/choHof1 rm choHof1.2bit twoBitMask choHof1.rmsk.2bit -add bed/simpleRepeat/trfMask.bed choHof1.2bit # can safely ignore warning about >=13 fields in bed file twoBitToFa choHof1.2bit stdout | faSize stdin > choHof1.2bit.faSize.txt # 2458927620 bases (398507935 N's 2060419685 real 1325416974 upper 735002711 # lower) in 481259 sequences in 1 files # %29.89 masked total, %35.67 masked real # link to gbdb ln -s `pwd`/choHof1.2bit /gbdb/choHof1 ########################################################################### # prepare for kluster runs (DONE _ 2008-10-16 - Hiram) # compare to size of real bases to adjust the repMatch # hg18: 2881421696 # choHof1: 2060419685 # thus: 1024 * 2060419685/2881421696 = 732 # rounding up to 800 for a bit more conservative masking cd /hive/data/genomes/choHof1 time blat choHof1.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=choHof1.11.ooc -repMatch=800 # Wrote 34983 overused 11-mers to choHof1.11.ooc # real 2m5.136s # and staging data for push to kluster nodes mkdir /hive/data/staging/data/choHof1 cp -p choHof1.2bit chrom.sizes choHof1.11.ooc \ /hive/data/staging/data/choHof1 # request to cluster admin to push this to the kluster nodes # /scratch/data/ ########################################################################### # cpgIsland - (DONE - 2012-04-23 - Hiram) mkdir /hive/data/genomes/choHof1/bed/cpgIslands cd /hive/data/genomes/choHof1/bed/cpgIslands time doCpgIslands.pl choHof1 > do.log 2>&1 # fixing broken command in script: time ./doLoadCpg.csh # real 4m0.503s # continuing: time doCpgIslands.pl -continue=cleanup choHof1 > cleanup.log 2>&1 # real 135m42.244s cat fb.choHof1.cpgIslandExt.txt # 9771165 bases of 2060419685 (0.474%) in intersection ############################################################################## # genscan - (DONE - 2011-04-25 - Hiram) mkdir /hive/data/genomes/choHof1/bed/genscan cd /hive/data/genomes/choHof1/bed/genscan time doGenscan.pl choHof1 > do.log 2>&1 # real 1004m5.449s cat fb.choHof1.genscan.txt # 41313862 bases of 2060419685 (2.005%) in intersection cat fb.choHof1.genscanSubopt.txt # 37047077 bases of 2060419685 (1.798%) in intersection ######################################################################### # windowMasker - (DONE - 2012-05-02 - Hiram) mkdir /hive/data/genomes/choHof1/bed/windowMasker cd /hive/data/genomes/choHof1/bed/windowMasker # trying out new version of the script that does all the usual steps # that used to be performed manually after it was done time /cluster/home/hiram/kent/src/hg/utils/automation/doWindowMasker.pl \ -workhorse=hgwdev -buildDir=`pwd` -dbHost=hgwdev choHof1 > do.log 2>&1 # real 219m35.396s # fixing some broken commands: # time ./doLoadClean.csh > loadClean.log 2>&1 # real 1460m21.553s # fixing some broken commands: time ./lastClean.sh # real 13m14.689s cat fb.choHof1.windowmaskerSdust.beforeClean.txt # 1188087555 bases of 2458927620 (48.317%) in intersection cat fb.choHof1.windowmaskerSdust.clean.txt # 789579620 bases of 2458927620 (32.111%) in intersection cat fb.choHof1.rmsk.windowmaskerSdust.txt # 377356167 bases of 2458927620 (15.346%) in intersection ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Choloepus hoffmanni 7 0 0 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add choHof1 just after ce2 # choHof1 (sloth) choHof1.serverGenome = /hive/data/genomes/choHof1/choHof1.2bit choHof1.clusterGenome = /scratch/data/choHof1/choHof1.2bit choHof1.ooc = /scratch/data/choHof1//choHof1.11.ooc choHof1.lift = no choHof1.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} choHof1.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} choHof1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} choHof1.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} choHof1.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} choHof1.refseq.mrna.native.load = no choHof1.refseq.mrna.xeno.load = yes choHof1.genbank.mrna.xeno.load = yes choHof1.genbank.est.native.load = no choHof1.downloadDir = choHof1 choHof1.perChromTables = no # end of section added to etc/genbank.conf git commit -m "adding choHof1 and removing blanks at end of line" \ etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for choHofNames Choloepus hoffmanni" \ src/lib/gbGenome.c git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S choHof1 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial choHof1 & # after setting genbank.mrna.xeno.load = no # var/build/logs/2012.05.31-10:16:29.choHof1.initalign.log # real 1472m49.406s # 2012-10-10 - see if genbank.mrna.xeno.load = yes is now viable # real 1098m41.122s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad choHof1 & # logFile: var/dbload/hgwdev/logs/2012.06.10-16:52:16.dbload.log # real 10m9.448s # Reloaded with genbank.mrna.xeno.load on Thu Oct 11 08:29:06 PDT 2012 # real 48m53.920s # enable daily alignment and update of hgwdev (DONE - 2012-06-11 - Hiram) cd ~/kent/src/hg/makeDb/genbank git pull # add choHof1 to: vi etc/align.dbs etc/hgwdev.dbs git commit -m "Added choHof1." etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # set default position to CHM/CHML/OPN3 gene displays: # (DONE - 2012-07-20 - Hiram) hgsql -e \ 'update dbDb set defaultPos="scaffold_5701:20408-28976" where name="choHof1";' \ hgcentraltest ############################################################################ # pushQ entry (DONE - 2012-07-20 - Hiram) mkdir /hive/data/genomes/choHof1/pushQ cd /hive/data/genomes/choHof1/pushQ # Mark says don't let the transMap track get there makePushQSql.pl choHof1 2> stderr.txt | grep -v transMap > choHof1.sql scp -p choHof1.sql hgwbeta:/tmp ssh hgwbeta cd /tmp hgsql qapushq < choHof1.sql ############################################################################