# for emacs: -*- mode: sh; -*- # This file describes browser build for the Orangutan # genome, July 2007 # # "$Id: ponAbe1.txt,v 1.4 2007/09/08 00:02:47 hiram Exp $" # ###################################################################### ## DOWNLOAD SEQUENCE (DONE - 2007-08-17 - Hiram) ssh kkstore01 mkdir /cluster/store2/ponAbe1 ln -s /cluster/store2/ponAbe1 /cluster/data/ponAbe1 mkdir /cluster/data/ponAbe1/wustl cd /cluster/data/ponAbe1/wustl for F in supercontigs.agp.gz supercontigs.fa.gz contigs.fa.gz contigs.fa.qual.gz do wget --timestamping \ ftp://genome.wustl.edu/pub/organism/Primates/Pongo_pygmaeus_abelii/assembly/Pongo_pygmaeus_abelii-2.0.2/output/${F} \ -O ${F} done ls -ogrt # -rw-rw-r-- 1 824149580 Jul 10 17:22 contigs.fa.gz # -rw-rw-r-- 1 9613417 Jul 10 17:23 supercontigs.agp.gz # -rw-rw-r-- 1 724013642 Jul 10 17:23 contigs.fa.qual.gz # -rw-rw-r-- 1 900117536 Jul 10 17:24 supercontigs.fa.gz ####################################################################### ## create config.ra and run makeGenomeDb.pl (DONE - 2007-08-21 - Hiram) ssh kkstore01 cd /cluster/data/ponAbe1 cat << '_EOF_' > ponAbe1.config.ra # Config parameters for makeGenomeDb.pl: db ponAbe1 scientificName Pongo pygmaeus abelii commonName Orangutan assemblyDate Jul. 2007 assemblyLabel WUSTL 2.0.2 orderKey 31 # mitoAcc gi:1743294 mitoAcc X97707.1 fastaFiles /cluster/data/ponAbe1/wustl/supercontigs.fa.gz agpFiles /cluster/data/ponAbe1/wustl/supercontigs.agp.gz # qualFiles /dev/null dbDbSpeciesDir orangutan '_EOF_' # << happy emacs time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \ -stop=agp ponAbe1.config.ra > makeGenomeDb.out 2>&1 & # real 24m24.468s time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \ -continue=db ponAbe1.config.ra > db.continue.out 2>&1 & ## says it needs a clade # add to the .ra file: clade mammal genomeCladePriority 11 ## and continue time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \ -continue=dbDb ponAbe1.config.ra > dbDb.continue.out 2>&1 & # add the trackDb files to the source tree and to the trackDb/makefile ########################################################################## # fetch photograph (DONE - 2007-08-21 - Hiram) mkdir /cluster/data/ponAbe1/photo cd /cluster/data/ponAbe1/photo wget --timestamping \ http://www.genome.gov/Images/press_photos/highres/84-300.jpg \ -O nhgri.original.84-300.jpg convert -geometry 300x200 -quality 80 nhgri.original.84-300.jpg \ Pongo_pygmaeus_abelii.jpg # check this .jpg image into the source tree browser/images/ directory ########################################################################## ## Repeat masker (DONE - 2007-08-22 - Hiram) ssh kkstore01 ## use screen for this mkdir /cluster/data/ponAbe1/bed/RepeatMasker cd /cluster/data/ponAbe1/bed/RepeatMasker time nice -n +19 ~/kent/src/hg/utils/automation/doRepeatMasker.pl \ -bigClusterHub=pk \ -buildDir=/cluster/data/ponAbe1/bed/RepeatMasker ponAbe1 > do.out 2>&1 & # real 1831m46.301s ############################################################################## ## simpleRepeat masking (DONE - 2007-09-05 - Hiram) ## create a kki kluster run ssh kkr1u00 mkdir /iscratch/i/ponAbe1 cd /iscratch/i/ponAbe1 cp -p /cluster/data/ponAbe1/ponAbe1.unmasked.2bit . cp -p /cluster/data/ponAbe1/chrom.sizes . twoBitToFa ponAbe1.unmasked.2bit ponAbe1.unmasked.fa mkdir split # split sequence into about 1000 files, each about 3,000,000 bases faSplit about ponAbe1.unmasked.fa 3000000 split/s_ for R in 2 3 4 5 6 7 8 do rsync -a --progress /iscratch/i/ponAbe1/ kkr${R}u00:/iscratch/i/ponAbe1/ done ssh kki mkdir -p /cluster/data/ponAbe1/bed/simpleRepeat/trf cd /cluster/data/ponAbe1/bed/simpleRepeat/trf cat << '_EOF_' > runTrf #!/bin/csh -fe # set C = $1:r set SRC = /iscratch/i/ponAbe1/split/$C.fa mkdir -p /scratch/tmp/$C cp -p $SRC /scratch/tmp/$C/$C.fa pushd /scratch/tmp/$C /cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $C.fa \ /dev/null -bedAt=$C.bed -tempDir=/scratch/tmp/$C popd rm -f $C.bed cp -p /scratch/tmp/$C/$C.bed . rm -fr /scratch/tmp/$C '_EOF_' # << happy emacs chmod +x runTrf cat << '_EOF_' > template #LOOP ./runTrf $(path1) {check out line $(root1).bed} #ENDLOOP '_EOF_' # << happy emacs ls /iscratch/i/ponAbe1/split > part.list gensub2 part.list single template jobList para create jobList para try ... check ... push ... etc ... # Completed: 894 of 894 jobs # CPU time in finished jobs: 45858s 764.31m 12.74h 0.53d 0.001 y # IO & Wait Time: 10652s 177.53m 2.96h 0.12d 0.000 y # Average job time: 63s 1.05m 0.02h 0.00d # Longest finished job: 855s 14.25m 0.24h 0.01d # Submission to last job: 3742s 62.37m 1.04h 0.04d cat s_*.bed > ../simpleRepeat.bed cd .. awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed ssh hgwdev cd /cluster/data/ponAbe1/bed/simpleRepeat time nice -n +19 hgLoadBed ponAbe1 simpleRepeat \ simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql # Loaded 1039128 elements of size 16 nice -n +19 featureBits ponAbe1 simpleRepeat \ > fb.simpleRepeat.ponAbe1.txt 2>&1 & cat fb.simpleRepeat.ponAbe1.txt # 119951362 bases of 3093799891 (3.877%) in intersection # add the trfMask to the rmsk masked sequence to get our final # masked sequence ssh kkstore01 cd /cluster/data/ponAbe1 time nice -n +19 cat bed/simpleRepeat/trfMask.bed \ | twoBitMask -add -type=.bed ponAbe1.rmsk.2bit stdin ponAbe1.2bit # measure it time nice -n +19 twoBitToFa ponAbe1.2bit stdout \ | faSize stdin > faSize.ponAbe1.2bit.txt 2>&1 grep masked faSize.ponAbe1.2bit.txt # %48.60 masked total, %50.91 masked real ## clean up the /iscratch/i/ponAbe1/ directory ssh kkr1u00 cd /iscratch/i/ponAbe1 rm -fr * for R in 2 3 4 5 6 7 8 do rsync -a --progress --delete --stats /iscratch/i/ponAbe1/ kkr${R}u00:/iscratch/i/ponAbe1/ done cd .. rmdir ponAbe1 for R in 2 3 4 5 6 7 8 do ssh kkr${R}u00 rmdir /iscratch/i/ponAbe1 done ############################################################################ # BLATSERVERS ENTRY (DONE - 2007-09-06 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("ponAbe1", "blat13", "17784", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("ponAbe1", "blat13", "17785", "0", "1");' \ hgcentraltest # test it with some sequence