# for emacs: -*- mode: sh; -*- # Mouse lemur ( Microcebus murinus) # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ABDC00 # http://www.ncbi.nlm.nih.gov/bioproject/19967 # http://www.ncbi.nlm.nih.gov/genome/777 # http://www.ncbi.nlm.nih.gov/genome/assembly/200518/ ######################################################################### # DOWNLOAD SEQUENCE (DONE braney 2008-07-28 ) ssh kkstore05 mkdir /cluster/store12/micMur1 ln -s /cluster/store12/micMur1 /cluster/data mkdir /cluster/data/micMur1/broad cd /cluster/data/micMur1/broad wget --timestamping \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/mouseLemur/MicMur1.0/assembly.agp \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/mouseLemur/MicMur1.0/assembly.bases.gz \ ftp://ftp.broad.mit.edu/pub/assemblies/mammals/mouseLemur/MicMur1.0/assembly.quals.gz md5sum ass* > assembly.md5sum qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin micMur1.qual.qac cut -f 1 assembly.agp | uniq -c | wc -l # Number of scaffolds: 185042 ######################################################################### # Create .ra file and run makeGenomeDb.pl ssh kkstore05 cd /cluster/data/micMur1 cat << _EOF_ >micMur1.config.ra # Config parameters for makeGenomeDb.pl: db micMur1 clade mammal genomeCladePriority 35 scientificName Microcebus murinus commonName Mouse lemur assemblyDate Jun. 2003 assemblyLabel Broad Institute micMur1 orderKey 236.5 #mitoAcc AJ222767 mitoAcc none fastaFiles /cluster/data/micMur1/broad/assembly.bases.gz agpFiles /cluster/data/micMur1/broad/assembly.agp qualFiles /cluster/data/micMur1/broad/micMur1.qual.qac dbDbSpeciesDir mouseLemur _EOF_ # use 'screen' make sure on kkstore05 makeGenomeDb.pl -verbose=2 micMur1.config.ra > makeGenomeDb.out 2>&1 & # 'ctl-a ctl -d' returns to previous shell cut -f 2 chrom.sizes | ave stdin # Q1 1105.000000 # median 2118.000000 # Q3 6087.000000 # average 15684.389144 # min 600.000000 # max 1949650.000000 # count 185042 # total 2902270736.000000 # standard deviation 61772.166578 ######################################################################### # REPEATMASKER (DONE braney 2008-08-03) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/micMur1/bed/repeatMasker cd /cluster/data/micMur1/bed/repeatMasker doRepeatMasker.pl -buildDir=/cluster/data/micMur1/bed/repeatMasker \ micMur1 > do.log 2>&1 & # Note: can run simpleRepeats simultaneously #### When done with RM: ssh pk para time # Completed: 6975 of 6975 jobs # CPU time in finished jobs: 25455563s 424259.38m 7070.99h 294.62d 0.807 y # IO & Wait Time: 255530s 4258.84m 70.98h 2.96d 0.008 y # Average job time: 3686s 61.44m 1.02h 0.04d # Longest finished job: 8546s 142.43m 2.37h 0.10d # Submission to last job: 439829s 7330.48m 122.17h 5.09d doRepeatMasker.pl -continue=cat -buildDir=/cluster/data/micMur1/bed/repeatMasker \ micMur1 > do2.log 2>&1 & time nice -n +19 featureBits micMur1 rmsk > fb.micMur1.rmsk.txt 2>&1 & # 692542939 bases of 1852394361 (37.386%) in intersection # RepeatMasker and lib version from do.log: # Jun 13 2008 (open-3-2-5) version of RepeatMasker # CC RELEASE 20080611; ######################################################################### # SIMPLE REPEATS TRF (DONE braney 2008-08-03) ssh kkstore05 screen # use a screen to manage this job mkdir /cluster/data/micMur1/bed/simpleRepeat cd /cluster/data/micMur1/bed/simpleRepeat # doSimpleRepeat.pl -buildDir=/cluster/data/micMur1/bed/simpleRepeat \ micMur1 > do.log 2>&1 & #### When done ssh pk para time # Completed: 51 of 51 jobs # CPU time in finished jobs: 24985s 416.41m 6.94h 0.29d # 0.001 y # IO & Wait Time: 101s 1.69m 0.03h 0.00d # 0.000 y # Average job time: 492s 8.20m 0.14h 0.01d # Longest finished job: 3887s 64.78m 1.08h 0.04d # Submission to last job: 3911s 65.18m 1.09h 0.05d featureBits micMur1 simpleRepeat # 24006609 bases of 1852394361 (1.296%) in intersection # after RM run is done, add this mask: cd /cluster/data/micMur1 twoBitMask micMur1.rmsk.2bit -add bed/simpleRepeat/trfMask.bed micMur1.2bit twoBitToFa micMur1.2bit stdout | faSize stdin # 2902270736 bases (1049876375 N's 1852394361 real 1161230687 upper 691163674 # lower) in 185042 sequences in 1 files # Total size: mean 15684.4 sd 61772.3 min 600 (scaffold_185041) max 1949650 # (scaffold_0) median 2118 # N count: mean 5673.7 sd 19304.2 # U count: mean 6275.5 sd 29644.7 # L count: mean 3735.2 sd 15428.8 # %23.81 masked total, %37.31 masked real twoBitToFa micMur1.rmsk.2bit stdout | faSize stdin # 2902270736 bases (1049876375 N's 1852394361 real 1161791947 upper 690602414 # lower) in 185042 sequences in 1 files # Total size: mean 15684.4 sd 61772.3 min 600 (scaffold_185041) max 1949650 # (scaffold_0) median 2118 # N count: mean 5673.7 sd 19304.2 # U count: mean 6278.5 sd 29656.4 # L count: mean 3732.1 sd 15417.4 # %23.80 masked total, %37.28 masked real # Link to it from /gbdb ssh hgwdev ln -s /cluster/data/micMur1/micMur1.2bit /gbdb/micMur1/micMur1.2bit # mkdir /san/sanvol1/scratch/micMur1 cp /cluster/data/micMur1/micMur1.2bit /san/sanvol1/scratch/micMur1 cp /cluster/data/micMur1/chrom.sizes /san/sanvol1/scratch/micMur1 ############################################################################ # add NCBI identifiers to the dbDb (DONE - 2008-10-21 - Hiram) hgsql -e 'update dbDb set sourceName="Broad Institute micMur1 (NCBI project 19967, ABDC00000000)" where name="micMur1";' hgcentraltest ########################################################################### # cpgIslands - (DONE - 2011-04-24 - Hiram) mkdir /hive/data/genomes/micMur1/bed/cpgIslands cd /hive/data/genomes/micMur1/bed/cpgIslands time doCpgIslands.pl micMur1 > do.log 2>&1 # real 314m39.323s cat fb.micMur1.cpgIslandExt.txt # 33763514 bases of 1852394361 (1.823%) in intersection ######################################################################### # genscan - (DONE - 2011-04-26 - Hiram) mkdir /hive/data/genomes/micMur1/bed/genscan cd /hive/data/genomes/micMur1/bed/genscan time doGenscan.pl micMur1 > do.log 2>&1 # recovering from power failure, kluster run had just finished # and it had just started on makeBed: time ./doMakeBed.csh # real 256m38.291s # continuing: time doGenscan.pl -continue=load micMur1 > load.log 2>&1 # real 9m19.938s # failed out of inodes on hive: time doGenscan.pl -continue=cleanup micMur1 > cleanup.log 2>&1 # real 25m41.674s cat fb.micMur1.genscan.txt # 50561491 bases of 1852394361 (2.730%) in intersection cat fb.micMur1.genscanSubopt.txt # 55072205 bases of 1852394361 (2.973%) in intersection ######################################################################### # windowMasker - (DONE - 2012-05-02 - Hiram) screen -S micMur1 mkdir /hive/data/genomes/micMur1/bed/windowMasker cd /hive/data/genomes/micMur1/bed/windowMasker # trying out new version of the script that does all the usual steps # that used to be performed manually after it was done time /cluster/home/hiram/kent/src/hg/utils/automation/doWindowMasker.pl \ -workhorse=hgwdev -buildDir=`pwd` -dbHost=hgwdev micMur1 > do.log 2>&1 # Elapsed time: 769m16s sed -e 's/^/ #\t/' fb.micMur1.windowmaskerSdust.beforeClean.txt # 1606982741 bases of 2902270736 (55.370%) in intersection sed -e 's/^/ #\t/' fb.micMur1.windowmaskerSdust.clean.txt # 557106366 bases of 2902270736 (19.196%) in intersection sed -e 's/^/ #\t/' fb.micMur1.rmsk.windowmaskerSdust.txt # 333189865 bases of 2902270736 (11.480%) in intersection ######################################################################### # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram) # Use -repMatch=900, based on size -- for human we use 1024 # use the "real" number from the faSize measurement, # hg19 is 2897316137, calculate the ratio factor for 1024: calc \( 1852394361 / 2897316137 \) \* 1024 # ( 1852394361 / 2897316137 ) * 1024 = 654.692735 # round up to 700 cd /hive/data/genomes/micMur1 time blat micMur1.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/micMur1.11.ooc -repMatch=700 # Wrote 19902 overused 11-mers to jkStuff/micMur1.11.ooc # real 0m47.067s # there are no non-bridged gaps, no lift file needed for genbank hgsql -N -e "select bridge from gap;" micMur1 | sort | uniq -c # 484693 yes # cd /hive/data/genomes/micMur1/jkStuff # gapToLift micMur1 micMur1.nonBridged.lift -bedFile=micMur1.nonBridged.bed # largest non-bridged contig: # awk '{print $3-$2,$0}' micMur1.nonBridged.bed | sort -nr | head # 123773608 chrX 95534 123869142 chrX.01 ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Melopsittacus undulatus 25 1 0 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add micMur1 just after ce2 # micMur1 (Mouse lemur) micMur1.serverGenome = /hive/data/genomes/micMur1/micMur1.2bit micMur1.clusterGenome = /hive/data/genomes/micMur1/micMur1.2bit micMur1.ooc = /hive/data/genomes/micMur1/jkStuff/micMur1.11.ooc micMur1.lift = no micMur1.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} micMur1.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} micMur1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} micMur1.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} micMur1.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} micMur1.refseq.mrna.native.load = no micMur1.refseq.mrna.xeno.load = yes micMur1.genbank.mrna.xeno.load = yes micMur1.genbank.est.native.load = no micMur1.downloadDir = micMur1 micMur1.perChromTables = no # end of section added to etc/genbank.conf git commit -m "adding micMur1 mouse lemur" etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for micMurNames" \ src/lib/gbGenome.c git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S micMur1 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial micMur1 & # var/build/logs/2012.06.07-16:10:31.micMur1.initalign.log # real 2142m37.090s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad micMur1 & # logFile: var/dbload/hgwdev/logs/2012.06.11-11:02:03.dbload.log # real 40m11.656s # enable daily alignment and update of hgwdev (DONE - 2012-05-11 - Hiram) cd ~/kent/src/hg/makeDb/genbank git pull # add micMur1 to: vi etc/align.dbs etc/hgwdev.dbs git commit -m "Added micMur1." etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # set default position to RHO gene displays (DONE - 2012-07-24 - Hiram) hgsql -e \ 'update dbDb set defaultPos="scaffold_31:985166-987669" where name="micMur1";' \ hgcentraltest ############################################################################ # pushQ entry (DONE - 2012-07-24 - Hiram) mkdir /hive/data/genomes/micMur1/pushQ cd /hive/data/genomes/micMur1/pushQ # Mark says don't let the transMap track get there time makePushQSql.pl micMur1 2> stderr.txt | grep -v transMap > micMur1.sql # real 3m52.913s scp -p micMur1.sql hgwbeta:/tmp ssh hgwbeta "hgsql qapushq < /tmp/micMur1.sql" ############################################################################