# for emacs: -*- mode: sh; -*- # Drosophila mojavensis -- # # Agencourt's 1 August 2005 assembly # # NOTE: this doc may have genePred loads that fail to include # the bin column. Please correct that for the next build by adding # a bin column when you make any of these tables: # # mysql> SELECT tableName, type FROM trackDb WHERE type LIKE "%Pred%"; # +-------------+---------------------------------+ # | tableName | type | # +-------------+---------------------------------+ # | xenoRefGene | genePred xenoRefPep xenoRefMrna | # | geneMapper | genePred | # | genscan | genePred genscanPep | # +-------------+---------------------------------+ # DOWNLOAD SEQUENCE (DONE 8/5/05 angie) ssh kkstore02 mkdir /cluster/store11/droMoj2 cd /cluster/data ln -s /cluster/store11/droMoj2 droMoj2 cd /cluster/data/droMoj2 mkdir jkStuff bed mkdir downloads cd downloads wget http://rana.lbl.gov/drosophila/assemblies/dmoj_agencourt_arachne_01aug05.tar.gz tar xvzf dmoj_agencourt_arachne_01aug05.tar.gz cd agencourt_arachne_01aug2005 faSize scaffolds.fa #194270144 bases (13750513 N's 180519631 real 180519631 upper 0 lower) in 6843 sequences in 1 files #Total size: mean 28389.6 sd 757113.0 min 101 (scaffold_5710) max 34172700 (scaffold_6540) median 1671 #N count: mean 2009.4 sd 16940.3 #U count: mean 26380.2 sd 746670.3 #L count: mean 0.0 sd 0.0 # PARTITION SCAFFOLDS FOR REPEATMASKER RUN (DONE 8/5/05 angie) # Chop up large scaffolds into ~500kb chunks for RepeatMasker, # then glom the tiny scaffolds up into ~500k collections (looks like # some almost-500k pieces are glommed together --> a few almost-1M chunks, # but that's OK). ssh kkstore02 cd /cluster/data/droMoj2 mkdir scaffoldsSplit mv downloads/agencourt_arachne_01aug2005/scaffolds.fa . faSplit size scaffolds.fa 500000 -oneFile \ scaffoldsSplit -lift=jkStuff/scaffoldsSplit.lft mkdir chunks500k faSplit about scaffoldsSplit.fa 500000 chunks500k/chunk_ # CREATING DATABASE (DONE 8/5/05 angie) # Create the database. ssh hgwdev # Make sure there is at least 5 gig free for the database df -h /var/lib/mysql #/dev/sdc1 /dev/sdc1 1.8T 970G 690G 59% /var/lib/mysql hgsql '' -e 'create database droMoj2' # Copy the table "grp" from an existing database to the new database hgsql droMoj2 -e 'create table grp (PRIMARY KEY(NAME)) select * from dm2.grp' # RUN REPEAT MASKER (DONE 8/5/05 angie) ssh kkstore02 cat /cluster/bluearc/RepeatMasker/Libraries/version #RepBase Update 9.11, RM database version 20050112 # make the run directory, output directory, and job list cd /cluster/data/droMoj2 cat << '_EOF_' > jkStuff/RMDrosophila #!/bin/csh -fe cd $1 /bin/mkdir -p /tmp/droMoj2/$2 /bin/cp ../chunks500k/$2 /tmp/droMoj2/$2/ pushd /tmp/droMoj2/$2 /cluster/bluearc/RepeatMasker/RepeatMasker -s -spec drosophila $2 popd /bin/cp /tmp/droMoj2/$2/$2.out ./ /bin/rm -fr /tmp/droMoj2/$2/* /bin/rmdir --ignore-fail-on-non-empty /tmp/droMoj2/$2 /bin/rmdir --ignore-fail-on-non-empty /tmp/droMoj2 '_EOF_' # << this line makes emacs coloring happy chmod +x jkStuff/RMDrosophila mkdir RMRun RMOut cp /dev/null RMRun/RMJobs foreach f ( chunks500k/*.fa ) set chunk = $f:t echo ../jkStuff/RMDrosophila \ /cluster/data/droMoj2/RMOut $chunk \ '{'check in line+ /cluster/data/droMoj2/$f'}' \ '{'check out line+ /cluster/data/droMoj2/RMOut/$chunk.out'}' \ >> RMRun/RMJobs end # do the run ssh kk9 cd /cluster/data/droMoj2/RMRun para make RMJobs para time #Completed: 379 of 379 jobs #Average job time: 3453s 57.55m 0.96h 0.04d #Longest finished job: 5584s 93.07m 1.55h 0.06d #Submission to last job: 16519s 275.32m 4.59h 0.19d # Lift up the split-scaffold .out's to scaffold .out's ssh kkstore02 cd /cluster/data/droMoj2 foreach f (RMOut/*.fa.out) liftUp $f:r:r.scaf.out jkStuff/scaffoldsSplit.lft warn $f > /dev/null end # Make a consolidated scaffold .out file too: head -3 RMOut/chunk_00.fa.out > RMOut/scaffolds.fa.out foreach f (RMOut/*.scaf.out) tail +4 $f >> RMOut/scaffolds.fa.out end # Load the .out files into the database with: ssh hgwdev hgLoadOut droMoj2 /cluster/data/droMoj2/RMOut/scaffolds.fa.out # hgLoadOut made a "scaffolds_rmsk" table even with -table=rmsk, # but we want a non-split with no prefix table: hgsql droMoj2 -e 'rename table scaffolds_rmsk to rmsk' # Fix up the indices too: hgsql droMoj2 -e 'drop index bin on rmsk; \ drop index genoStart on rmsk; \ drop index genoEnd on rmsk; \ create index bin on rmsk (genoName(7), bin); \ create index genoStart on rmsk (genoName(7), genoStart);' # EXTRACT AGP FROM ASSEMBLY.LINKS FILE (DONE 1/23/06 angie) ssh hgwdev cd /cluster/data/droMoj2 # assembly.links includes negative gap values which are supposed to # indicate overlap between contigs, but when scaffold sequences were # generated, Mike Eisen chose to use +25 instead of the negative values. # So replace negative gap values with 25 for consistency with the # scaffold sequences, then translate to AGP: perl -wpe 'next if (/^#/); @w = split; \ $w[6] = 25 if ($w[6] < 0); $w[7] = 25 if ($w[7] < 0); \ $_ = join("\t", @w) . "\n";' \ downloads/agencourt_arachne_01aug2005/assembly.links \ | ~/kent/src/utils/arachneLinksToAgp.pl \ > scaffolds.agp nice checkAgpAndFa scaffolds.agp scaffolds.fa | tail hgGoldGapGl -noGl droMoj2 scaffolds.agp # EXTRACTING GAP INFO FROM BLOCKS OF NS (DONE 8/5/04 angie) ssh kkstore02 mkdir /cluster/data/droMoj2/bed/fakeAgp cd /cluster/data/droMoj2/bed/fakeAgp faGapSizes ../../scaffolds.fa \ -niceSizes=5,10,20,25,30,40,50,100,250,500,1000,10000,100000 # A disproportionately large number of gaps are exactly 25bp long, so # hgFakeAgp's default -minContigGap of 25 will be fine. hgFakeAgp ../../scaffolds.fa fake.agp ssh hgwdev hgLoadGap -unsplit droMoj2 /cluster/data/droMoj2/bed/fakeAgp/fake.agp # SIMPLE REPEATS (TRF) (DONE 8/5/05 angie) ssh kolossus mkdir /cluster/data/droMoj2/bed/simpleRepeat cd /cluster/data/droMoj2/bed/simpleRepeat nice trfBig -trf=/cluster/bin/i386/trf ../../scaffolds.fa \ /dev/null -bedAt=simpleRepeat.bed -tempDir=/tmp \ |& egrep -v '^(Removed|Tandem|Copyright|Loading|Allocating|Initializing|Computing|Scanning|Freeing)' \ > trf.log & # check on this with tail -f trf.log # Load this into the database as so ssh hgwdev hgLoadBed droMoj2 simpleRepeat \ /cluster/data/droMoj2/bed/simpleRepeat/simpleRepeat.bed \ -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql # FILTER SIMPLE REPEATS (TRF) INTO MASK (DONE 8/5/05 angie) # make a filtered version of the trf output: # keep trf's with period <= 12: ssh kkstore02 cd /cluster/data/droMoj2/bed/simpleRepeat awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed # MASK FA USING REPEATMASKER AND FILTERED TRF FILES (DONE 8/5/05 angie) ssh kkstore02 cd /cluster/data/droMoj2 maskOutFa -soft scaffolds.fa bed/simpleRepeat/trfMask.bed \ scaffolds.fa maskOutFa -softAdd scaffolds.fa RMOut/scaffolds.fa.out scaffolds.fa # Now clean up the unmasked split scaffolds to avoid confusion later. rm -r chunks500k scaffoldsSplit.fa jkStuff/scaffoldsSplit.lft # STORE SEQUENCE AND ASSEMBLY INFORMATION (DONE 8/5/05 angie) # Translate to 2bit ssh kkstore02 cd /cluster/data/droMoj2 faToTwoBit scaffolds.fa droMoj2.2bit # Make chromInfo.tab. mkdir bed/chromInfo twoBitInfo droMoj2.2bit stdout \ | awk '{printf "%s\t%s\t/gbdb/droMoj2/droMoj2.2bit\n", $1, $2;}' \ > bed/chromInfo/chromInfo.tab # Make symbolic a link from /gbdb/droMoj2 to the 2bit. ssh hgwdev mkdir -p /gbdb/droMoj2 ln -s /cluster/data/droMoj2/droMoj2.2bit /gbdb/droMoj2/ # Load chromInfo table. hgsql droMoj2 < $HOME/kent/src/hg/lib/chromInfo.sql hgsql droMoj2 -e 'load data local infile \ "/cluster/data/droMoj2/bed/chromInfo/chromInfo.tab" into table chromInfo' # Make chrom.sizes from chromInfo contents and check scaffold count. hgsql droMoj2 -N -e 'select chrom,size from chromInfo' \ > /cluster/data/droMoj2/chrom.sizes wc -l /cluster/data/droMoj2/chrom.sizes # 6843 /cluster/data/droMoj2/chrom.sizes # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE (DONE 8/5/05 angie) # Warning: genome and organism fields must correspond # with defaultDb values hgsql -h genome-testdb hgcentraltest -e \ 'INSERT INTO dbDb \ (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk, hgPbOk, sourceName) values \ ("droMoj2", "Aug. 2005", "/gbdb/droMoj2", "D. mojavensis", \ "scaffold_6500:23838548-23875233", 1, 57, \ "D. mojavensis", \ "Drosophila mojavensis", "/gbdb/droMoj2/html/description.html", \ 0, 0, "Agencourt 1 August 2005");' # This is not the first droAna, so defaultDb and genomeClade already # have entries for D. mojavensis. # Make trackDb table so browser knows what tracks to expect: ssh hgwdev cd ~/kent/src/hg/makeDb/trackDb cvs up -d -P # Edit trackDb/makefile to add droMoj2 to the DBS variable. mkdir drosophila/droMoj2 # Create a simple drosophila/droMoj2/description.html file. cvs add drosophila/droMoj2 cvs add drosophila/droMoj2/description.html make update DBS=droMoj2 ZOO_DBS= # go public on genome-test cvs ci makefile cvs ci drosophila/droMoj2 mkdir /gbdb/droMoj2/html # in a clean, updated tree's kent/src/hg/makeDb/trackDb: make alpha # PUT SEQUENCE ON /ISCRATCH FOR BLASTZ (DONE 8/5/05 angie) # First, agglomerate small scaffolds into chunks of ~100k median # (many scaffolds are larger than that) so we don't have too many # files for one dir, but keep a reasonably low job run time: ssh kkstore02 cd /cluster/data/droMoj2 mkdir chunksUnsplit faSplit about scaffolds.fa 100000 chunksUnsplit/chunk_ ssh kkr1u00 mkdir /iscratch/i/droMoj2 rsync -av /cluster/data/droMoj2/chunksUnsplit /iscratch/i/droMoj2/ rsync -av /cluster/data/droMoj2/droMoj2.2bit /iscratch/i/droMoj2/ iSync # PRODUCING GENSCAN PREDICTIONS (DONE 8/5/05 angie) ssh kkstore02 # Make hard-masked scaffolds and split up for processing: cd /cluster/data/droMoj2 maskOutFa scaffolds.fa hard scaffolds.fa.masked mkdir chunksUnsplitMasked faSplit about scaffolds.fa.masked 100000 chunksUnsplitMasked/chunk_ mkdir /cluster/data/droMoj2/bed/genscan cd /cluster/data/droMoj2/bed/genscan # Check out hg3rdParty/genscanlinux to get latest genscan: cvs co hg3rdParty/genscanlinux # Make 3 subdirectories for genscan to put their output files in mkdir gtf pep subopt ls -1S ../../chunksUnsplitMasked/chunk*.fa > chunks.list cat << '_EOF_' > gsub #LOOP gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000 #ENDLOOP '_EOF_' # << this line keeps emacs coloring happy ssh kki cd /cluster/data/droMoj2/bed/genscan gensub2 chunks.list single gsub jobList para make jobList para time #Completed: 304 of 304 jobs #Average job time: 24s 0.40m 0.01h 0.00d #Longest finished job: 1097s 18.28m 0.30h 0.01d #Submission to last job: 1097s 18.28m 0.30h 0.01d # If there are crashes, diagnose with "para problems". # If a job crashes due to genscan running out of memory, re-run it # manually with "-window=1200000" instead of "-window=2400000". # Concatenate scaffold-level results: ssh kkstore02 cd /cluster/data/droMoj2/bed/genscan cat gtf/*.gtf > genscan.gtf cat subopt/*.bed > genscanSubopt.bed cat pep/*.pep > genscan.pep # Clean up rm -r /cluster/data/droMoj2/chunksUnsplitMasked # Load into the database as so: ssh hgwdev cd /cluster/data/droMoj2/bed/genscan ldHgGene -gtf droMoj2 genscan genscan.gtf hgPepPred droMoj2 generic genscanPep genscan.pep hgLoadBed droMoj2 genscanSubopt genscanSubopt.bed # MAKE DOWNLOADABLE FILES (DONE 8/5/05 angie) ssh kkstore02 mkdir /cluster/data/droMoj2/zips cd /cluster/data/droMoj2 gzip -c RMOut/scaffolds.fa.out > zips/scaffoldOut.gz gzip -c scaffolds.fa > zips/scaffoldFa.gz gzip -c scaffolds.fa.masked > zips/scaffoldFaMasked.gz gzip -c bed/simpleRepeat/trfMask.bed > zips/scaffoldTrf.gz ssh hgwdev mkdir /usr/local/apache/htdocs/goldenPath/droMoj2 cd /usr/local/apache/htdocs/goldenPath/droMoj2 mkdir bigZips database # Create README.txt files in bigZips/ and database/ to explain the files. cd bigZips ln -s /cluster/data/droMoj2/zips/*.gz . nice md5sum *.gz > md5sum.txt # MAKE 11.OOC FILE FOR BLAT (DONE 8/5/05 angie) # Use -repMatch=100 (based on size -- for human we use 1024, and # fly size is ~4.4% of human judging by gapless dm1 genome size from # featureBits -- we would use 45, but bump that up a bit to be more # conservative). ssh kkr1u00 mkdir /cluster/bluearc/droMoj2 blat /cluster/data/droMoj2/droMoj2.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=/cluster/bluearc/droMoj2/11.ooc -repMatch=100 #Wrote 17296 overused 11-mers to /cluster/bluearc/droMoj2/11.ooc cp -p /cluster/bluearc/droMoj2/*.ooc /iscratch/i/droMoj2/ iSync # AUTO UPDATE GENBANK MRNA RUN (TODO 8/5/05 angie) ssh hgwdev # Update genbank config and source in CVS: cd ~/kent/src/hg/makeDb/genbank cvsup . # Edit etc/genbank.conf and add these lines (note scaffold-browser settings): # droMoj2 (D. mojavensis) droMoj2.genome = /iscratch/i/droMoj2/droMoj2.2bit droMoj2.mondoTwoBitParts = 1000 droMoj2.lift = no droMoj2.refseq.mrna.native.load = no droMoj2.refseq.mrna.xeno.load = yes droMoj2.refseq.mrna.xeno.pslReps = -minCover=0.15 -minAli=0.75 -nearTop=0.005 droMoj2.genbank.mrna.xeno.load = yes # GenBank has no D. mojavensis ESTs at this point... that may change. droMoj2.genbank.est.native.load = no droMoj2.genbank.est.xeno.load = no droMoj2.downloadDir = droMoj2 droMoj2.perChromTables = no cvs ci etc/genbank.conf # Update src/align/gbBlat to use /iscratch/i/droMoj2/11.ooc cvs diff src/align/gbBlat make cvs ci src/align/gbBlat # Install to /cluster/data/genbank: make install-server ssh `fileServer /cluster/data/genbank/` cd /cluster/data/genbank # This is an -initial run, (xeno) RefSeq only: nice bin/gbAlignStep -srcDb=refseq -type=mrna -initial droMoj2 & tail -f [its logfile] # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad droMoj2 featureBits droMoj2 xenoRefGene #16520385 bases of 165766797 (9.966%) in intersection # Clean up: rm -rf work/initial.droMoj2 # This is an -initial run, mRNA only: nice bin/gbAlignStep -srcDb=genbank -type=mrna -initial droMoj2 & tail -f [its logfile] # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad droMoj2 featureBits droMoj2 all_mrna #19602 bases of 165766797 (0.012%) in intersection featureBits droMoj2 xenoMrna #17295487 bases of 165766797 (10.434%) in intersection # Clean up: rm -rf work/initial.droMoj2 # SWAP CHAINS FROM DM2, BUILD NETS ETC. (IN PROGRESS 8/9/05 angie) mkdir /cluster/data/droMoj2/bed/blastz.dm2.swap cd /cluster/data/droMoj2/bed/blastz.dm2.swap doBlastzChainNet.pl -swap /cluster/data/dm2/bed/blastz.droMoj2/DEF \ >& do.log echo "check /cluster/data/droMoj2/bed/blastz.dm2.swap/do.log" \ | mail -s "check do.log" $USER # Add {chain,net}Dm2 to trackDb.ra if necessary. # MAKE GCPERCENT (DONE 8/9/05 angie) ssh hgwdev mkdir /cluster/data/droMoj2/bed/gc5Base cd /cluster/data/droMoj2/bed/gc5Base hgGcPercent -wigOut -doGaps -file=stdout -win=5 -verbose=2 droMoj2 \ /cluster/data/droMoj2 | wigEncode stdin gc5Base.wig gc5Base.wib mkdir /gbdb/droMoj2/wib ln -s `pwd`/gc5Base.wib /gbdb/droMoj2/wib hgLoadWiggle -pathPrefix=/gbdb/droMoj2/wib droMoj2 gc5Base gc5Base.wig # MAKE THIS THE DEFAULT ASSEMBLY WHEN THERE ARE ENOUGH TRACKS (TODO 8/?/05 angie) hgsql -h genome-testdb hgcentraltest -e \ 'UPDATE defaultDb set name = "droMoj2" where genome = "D. mojavensis";' # MAKE Drosophila Proteins track (DONE braney 2005-08-20) ssh kkstore02 mkdir -p /cluster/data/droMoj2/blastDb cd /cluster/data/droMoj2/blastDb faSplit sequence ../scaffolds.fa 400 x for i in *.fa; do formatdb -i $i -p F 2> /dev/null; done rm *.fa *.log ssh kkr1u00 mkdir -p /iscratch/i/droMoj2/blastDb cp /cluster/data/droMoj2/blastDb/* /iscratch/i/droMoj2/blastDb iSync > sync.out ssh kk mkdir -p /cluster/data/droMoj2/bed/tblastn.dm2FB cd /cluster/data/droMoj2/bed/tblastn.dm2FB ls -1S /iscratch/i/droMoj2/blastDb/*.nsq | sed "s/\.nsq//" > target.lst mkdir fbfa # calculate a reasonable number of jobs calc `wc /cluster/data/dm2/bed/blat.dm2FB/dm2FB.psl|awk "{print \\\$1}"`/\(80000/`wc target.lst | awk "{print \\\$1}"`\) # 18929/(80000/396) = 93.698550 split -l 94 /cluster/data/dm2/bed/blat.dm2FB/dm2FB.psl fbfa/fb cd fbfa for i in *; do pslxToFa $i $i.fa; rm $i; done cd .. ls -1S fbfa/*.fa > fb.lst mkdir -p /cluster/bluearc/droMoj2/bed/tblastn.dm2FB/blastOut ln -s /cluster/bluearc/droMoj2/bed/tblastn.dm2FB/blastOut for i in `cat fb.lst`; do mkdir blastOut/`basename $i .fa`; done tcsh cat << '_EOF_' > blastGsub #LOOP blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } #ENDLOOP '_EOF_' cat << '_EOF_' > blastSome #!/bin/sh BLASTMAT=/iscratch/i/blast/data export BLASTMAT g=`basename $2` f=/tmp/`basename $3`.$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8 then mv $f.8 $f.1 break; fi done if test -f $f.1 then if /cluster/bin/i386/blastToPsl $f.1 $f.2 then liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/dm2/bed/blat.dm2FB/protein.lft warn $f.2 mv $3.tmp $3 rm -f $f.1 $f.2 $f.3 $f.4 exit 0 fi fi rm -f $f.1 $f.2 $3.tmp $f.3 $f.8 $f.4 exit 1 '_EOF_' chmod +x blastSome gensub2 target.lst fb.lst blastGsub blastSpec para create blastSpec para push # Completed: 79991 of 79992 jobs # para.results: file not found. paraHub can't write to this dir? # CPU time in finished jobs: 5722807s 95380.12m 1589.67h 66.24d 0.181 y # IO & Wait Time: 517066s 8617.76m 143.63h 5.98d 0.016 y # Average job time: 78s 1.30m 0.02h 0.00d # Longest finished job: 44845s 747.42m 12.46h 0.52d # Submission to last job: 78192s 1303.20m 21.72h 0.91d ssh pk cd /cluster/data/droMoj2/bed/tblastn.dm2FB tcsh cat << '_EOF_' > chainGsub #LOOP chainSome $(path1) #ENDLOOP '_EOF_' cat << '_EOF_' > chainSome (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=25000 stdin ../c.`basename $1`.psl) '_EOF_' chmod +x chainSome ls -1dS `pwd`/blastOut/fb?? > chain.lst gensub2 chain.lst single chainGsub chainSpec para create chainSpec para maxNode 20 para push # Completed: 202 of 202 jobs # CPU time in finished jobs: 27335s 455.59m 7.59h 0.32d 0.001 y # IO & Wait Time: 3390s 56.50m 0.94h 0.04d 0.000 y # Average job time: 152s 2.54m 0.04h 0.00d # Longest finished job: 4228s 70.47m 1.17h 0.05d # Submission to last job: 4228s 70.47m 1.17h 0.05d ssh kkstore02 cd /cluster/data/droMoj2/bed/tblastn.dm2FB/blastOut for i in fb?? do awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl sort -rn c60.$i.psl | pslUniq stdin u.$i.psl awk "((\$1 / \$11) ) > 0.60 { print }" c60.$i.psl > m60.$i.psl echo $i done sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/droMoj2/bed/tblastn.dm2FB/blastDm2FB.psl cd .. wc blastDm2FB.psl # 19158 402318 5264815 blastDm2FB.psl pslUniq blastDm2FB.psl stdout | wc # 17958 377118 5085091 cat fbfa/*fa | grep ">" | wc # 82338 82338 1300520 ssh hgwdev cd /cluster/data/droMoj2/bed/tblastn.dm2FB hgLoadPsl droMoj2 blastDm2FB.psl featureBits droMoj2 blastDm2FB # 18239436 bases of 180520992 (10.104%) in intersection exit # back to kkstore02 rm -rf blastOut # End tblastn # AUTO UPDATE GENBANK MRNA AND EST GENES RUN (DONE, 2005-08-22, markd) # align with revised genbank process cd ~kent/src/makeDb/genbank cvs update -d etc # edit etc/genbank.conf to add droMoj2, had to run on pk, due to kk # being down. Set temporary locations for server files # droMoj2 (D. mojavensis) # genbank has 1 mRNA and 0 ESTs at time of initial build droMoj2.serverGenome = /cluster/data/droMoj2/droMoj2.2bit ##droMoj2.clusterGenome = /iscratch/i/droMoj2/droMoj2.2bit ##droMoj2.ooc = /iscratch/i/droMoj2/11.ooc droMoj2.clusterGenome = /san/sanvol1/scratch/droMoj2/droMoj2.2bit droMoj2.ooc = /san/sanvol1/scratch/droMoj2/11.ooc droMoj2.lift = no droMoj2.refseq.mrna.native.load = no droMoj2.refseq.mrna.xeno.load = yes droMoj2.refseq.mrna.xeno.pslCDnaFilter = -minCover=0.15 -coverNearTop=0.005 -minId=0.75 -idNearTop=0.005 -maxRepMatch=0.4 -bestOverlap droMoj2.genbank.mrna.xeno.load = yes droMoj2.genbank.est.native.load = no droMoj2.genbank.est.xeno.load = no droMoj2.downloadDir = droMoj2 droMoj2.perChromTables = no # update /cluster/data/genbank/ make etc-update ssh kkstore02 cd /cluster/data/genbank nice bin/gbAlignStep -initial droMoj2 & # when finished ssh hgwdev cd /cluster/data/genbank nice ./bin/gbDbLoadStep -drop -initialLoad droMoj2& # AUTO UPDATE GENBANK MRNA AND EST GENES RUN (DONE, 2005-08-31, markd) # redo genbank revised alignment procedure once again to # pickup local near best pslCDnaFilter # align with revised genbank process cd ~kent/src/makeDb/genbank cvs update -d etc # edit etc/genbank.conf to add droMoj2 # droMoj2 (D. mojavensis) # genbank has 1 mRNA and 0 ESTs at time of initial build droMoj2.serverGenome = /cluster/data/droMoj2/droMoj2.2bit droMoj2.clusterGenome = /iscratch/i/droMoj2/droMoj2.2bit droMoj2.ooc = /iscratch/i/droMoj2/11.ooc droMoj2.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} droMoj2.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} droMoj2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} droMoj2.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} droMoj2.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} droMoj2.genbank.est.xeno.pslCDnaFilter = ${lowCover.genbank.est.xeno.pslCDnaFilter} droMoj2.lift = no droMoj2.refseq.mrna.native.load = no droMoj2.refseq.mrna.xeno.load = yes droMoj2.genbank.mrna.xeno.load = yes droMoj2.genbank.est.native.load = no droMoj2.genbank.est.xeno.load = no droMoj2.downloadDir = droMoj2 droMoj2.perChromTables = no # update /cluster/data/genbank/ make etc-update ssh kkstore02 cd /cluster/data/genbank nice bin/gbAlignStep -initial droMoj2 & # when finished ssh hgwdev cd /cluster/data/genbank nice ./bin/gbDbLoadStep -drop -initialLoad droMoj2& # GENEMAPPER PREDICTIONS FROM UCB (DONE 1/24/06 angie) ssh hgwdev mkdir /cluster/data/droMoj2/bed/geneMapper cd /cluster/data/droMoj2/bed/geneMapper wget http://bio.math.berkeley.edu/genemapper/GFF/rel0.2/DroMoj_20050801.gff # Don't use -genePredExt... there are no start/stop_codon items, so # all get marked "incmpl", and name2 always gets the same value as name. # Get rid of custom track header lines: egrep -v '^(track|browser) ' DroMoj_20050801.gff \ | ldHgGene -gtf droMoj2 geneMapper stdin ########################################################################### # LIFTOVER TO DROMOJ3 (DONE 3/16/09 angie) doSameSpeciesLiftOver.pl droMoj2 droMoj3 -debug \ -ooc=/hive/data/staging/data/droMoj2/11.ooc -workhorse=kolossus # *** Steps were performed in /hive/data/genomes/droMoj2/bed/blat.droMoj3.2009-03-16 cd /hive/data/genomes/droMoj2/bed/blat.droMoj3.2009-03-16/ doSameSpeciesLiftOver.pl droMoj2 droMoj3 \ -ooc=/hive/data/staging/data/droMoj2/11.ooc -workhorse=kolossus ###########################################################################