# for emacs: -*- mode: sh; -*- # Caenorhabditis remanei -- # # WashU's genome v2 PCAP assembly. # DOWNLOAD SEQUENCE (DONE 3/9/05 Andy) # I'm using the assembly as non-sinlgleton supercontigs. The fasta # files in the WashU FTP site don't use genbank accessions. Joanne # Nelson provided an AGP file describing an assembly using the genbank # sequence. So I just went to genbank to get the sequence through their # Taxonomy browser: # http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=31234 # In the Entrez records section, click on Nucleotide. Then on the new web page, # select FASTA in the "Display" box, and "File" in the "Send to" box. The FASTA # file downloads to /tmp/sequences.fasta mkdir -p /cluster/store10/caeRem1/downloads ln -s /cluster/store10/caeRem1 /cluster/data/caeRem1 cd /cluster/store10/caeRem1/downloads cp /tmp/sequences.fasta . # Get the file from Joanne Nelson and clean it up (the AGP records in the file start # with AAGD00 instead of AAGD01 like in genbank): wget ftp://genome.wustl.edu/private/joanne/remanei/remanei_pcap.contigs.agp_no_singletons.gz zcat remanei_pcap.contigs.agp_no_singletons.gz | sed 's/AAGD00/AAGD01/' > remanei.agp # Clean up the genbank fasta and grab the records in the AGP file. sed -e '/^$/d; s/^>gi.*gb|\(.*\)|.*$/>\1/' sequences.fasta > genbank.fa rm sequences.fa remanei_pcap.contigs.agp_no_singletons.gz grep AAGD remanei.agp | cut -f6 | sort | uniq > accs.txt faSomeRecords genbank.fa accs.txt remanei.contigs.fa agpAllToFaFile -sizes=supercontigs remanei.agp remanei.contigs.fa remanei.unmasked.fa faSize remanei.unmasked.fa #143407347 bases (5365830 N's 138041517 real 138041517 upper 0 lower) in 3584 sequences in 1 files #Total size: mean 40013.2 sd 199471.6 min 2021 (SuperCont6529) max 5925111 (SuperCont0) median 5701 #N count: mean 1497.2 sd 2951.0 #U count: mean 38516.0 sd 197042.1 #L count: mean 0.0 sd 0.0 # CREATING DATABASE (DONE 3/10/05 Andy) # Create the database. ssh hgwdev hgsql '' -e 'create database caeRem1' # LOAD GAP & GOLD TABLES FROM AGP (DONE 3/10/2005 Andy) ssh hgwdev cd /cluster/data/caeRem1/downloads hgGoldGapGl -noGl caeRem1 remanei.agp # For some reason, the indices did not get built correctly -- # "show index from gap/gold" shows NULL cardinalities for chrom. # Rebuild indices with "analyze table". # *** Andy's note: the same thing happened in this assembly too. hgsql caeRem1 -e 'analyze table gold; analyze table gap;' # REPEATMASKER (DONE 3/10/05 Andy) # Split files for the run: ssh kksilo cd /cluster/data/caeRem1/downloads mkdir ../unmaskedSplits faSplit -outDirDepth=1 -lift=../unmaskedSplits/splits.lft gap remanei.unmasked.fa 500000 ../unmaskedSplits/ # copy to iscratch ssh kkr1u00 mkdir /iscratch/i/caeRem1 cd /iscratch/i/caeRem1 cp -r /cluster/data/caeRem1/unmaskedSplits . iSync ssh kk cd /cluster/data/caeRem1 mkdir RMRun RMOut jkStuff cat << "_EOF_" > jkStuff/RMApis #!/bin/bash file=`basename $1` wholedir=`dirname $1` part=`basename $wholedir` cd /cluster/data/caeRem1/RMOut mkdir -p /tmp/caeRem1/$part cp /iscratch/i/caeRem1/unmaskedSplits/$part/$file /tmp/caeRem1/$part pushd /tmp/caeRem1/$part /cluster/bluearc/RepeatMasker/RepeatMasker -s -spec worm $file popd mkdir -p $part cp /tmp/caeRem1/$part/$file.out ./$part/$file.out rm -fr /tmp/caeRem1/$part/$file.* rmdir --ignore-fail-on-non-empty /tmp/caeRem1/$part rmdir --ignore-fail-on-non-empty /tmp/caeRem1 _EOF_ # << this line makes emacs coloring happy chmod +x jkStuff/RMApis cd RMRun find ../unmaskedSplits/ -name '*.fa' > files.lst cat << "_EOF_" > gsub #LOOP ../jkStuff/RMApis $(path1) {check in line+ $(path1)} {check out line+ ../RMOut/$(lastDir1)/$(root1).fa.out} #ENDLOOP _EOF_ gensub2 files.lst single gsub RMJobs para create RMJobs para try para push para time #3718 jobs in batch #126046 jobs (including everybody's) in Parasol queue. #Checking finished jobs #Completed: 3718 of 3718 jobs #CPU time in finished jobs: 1183137s 19718.95m 328.65h 13.69d 0.038 y #IO & Wait Time: 18806s 313.44m 5.22h 0.22d 0.001 y #Average job time: 323s 5.39m 0.09h 0.00d #Longest running job: 0s 0.00m 0.00h 0.00d #Longest finished job: 4496s 74.93m 1.25h 0.05d #Submission to last job: 10211s 170.18m 2.84h 0.12d # Lift results. ssh kksilo cd /cluster/data/caeRem1/RMOut for dir in *; do cd $dir for file in *; do tail +5 $file >> ../tmp done cd ../ done head -n 4 0/000.fa.out | cat - tmp > rmsk.out liftUp rmsk.lifted.out ../unmaskedSplits/splits.lft warn rmsk.out rm tmp rmsk.out # Load results ssh hgwdev cd /cluster/data/caeRem1/RMOut hgLoadOut caeRem1 rmsk.lifted.out hgsql caeRem1 -e 'rename table rmsk_rmsk to rmsk' hgsql caeRem1 -e 'drop index bin on rmsk; \ drop index genoStart on rmsk; \ drop index genoEnd on rmsk; \ create index bin on rmsk (genoName(11), bin); \ create index genoStart on rmsk (genoName(11), genoStart); \ create index genoEnd on rmsk (genoName(11), genoEnd);' # SIMPLE REPEATS (TRF) (DONE 3/10/2005 Andy) ssh kksilo mkdir -p /cluster/data/caeRem1/bed/simpleRepeat cd /cluster/data/caeRem1/bed/simpleRepeat nice trfBig ../../downloads/remanei.unmasked.fa trf.bed -bed -tempDir=/tmp > trf.log & # Load this into the database as so ssh hgwdev cd /cluster/data/caeRem1/bed/simpleRepeat hgLoadBed -sqlTable=/cluster/home/aamp/kent/src/hg/lib/simpleRepeat.sql \ caeRem1 simpleRepeat trf.bed # MAKE MASKED 2BIT (DONE 3/10/2005 Andy) # make a filtered version of the trf output: # keep trf's with period <= 12: ssh kksilo cd /cluster/data/apiMel/bed/simpleRepeat awk '{if ($5 <= 12) print;}' trf.bed > trfMask.bed cd ../../ mkdir maskedFa cd maskedFa/ maskOutFa -soft ../downloads/remanei.unmasked.fa ../bed/simpleRepeat/trfMask.bed remanei.softMasked.fa maskOutFa -softAdd remanei.softMasked.fa ../RMOut/rmsk.lifted.out remanei.softMasked.fa maskOutFa remanei.softMasked.fa hard remanei.hardMasked.fa faToTwoBit remanei.softMasked.fa caeRem1.2bit # faToTwoBit remanei.hardMasked.fa caeRem1.hardMasked.2bit mv *.2bit ../ cd ../ # Load into database hgsql caeRem1 < ~/kent/src/hg/lib/chromInfo.sql twoBitInfo caeRem1.2bit chrom.sizes.1 awk '{printf("%s\t%s\t/gbdb/caeRem1/caeRem1.2bit\n", $1, $2)}' chrom.sizes.1 > chrom.sizes rm chrom.sizes.1 echo "load data local infile 'chrom.sizes' into table chromInfo;" | hgsql caeRem1 # CREATING GRP TABLE FOR TRACK GROUPING (DONE 3/30/2005 Andy) # Copy all the data from the table "grp" # in an existing database to the new database ssh hgwdev hgsql caeRem1 -e 'create table grp (PRIMARY KEY(NAME)) select * from hg17.grp' # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE (DONE 3/30/2005 Andy) # Warning: genome and organism fields must correspond # with defaultDb values echo 'INSERT INTO dbDb \ (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk, hgPbOk, sourceName) values \ ("caeRem1", "March 2005", "/gbdb/caeRem1", "C. remanei", \ "SuperCont12:1-12000", 1, 80, \ "C. remanei", \ "Caenorhabditis remanei", "/gbdb/caeRem1/html/description.html", \ 0, 0, "WUGSC");' \ | hgsql -N hgcentraltest echo 'update defaultDb set name="caeRem1" where genome="C. remanei"' \ | hgsql -N hgcentraltest hgsql hgcentraltest -e 'INSERT INTO genomeClade (genome,clade,priority) \ values ("C. remanei", "worm",30);' ssh hgwdev cd ~/kent/src/hg/makeDb/trackDb cvs update # Edit trackDb/makefile to add caeRem1 to the DBS variable. mkdir worm/caeRem1 # Create a simple worm/caeRem1/description.html file. In the initial case it's just empty. cd worm cvs add caeRem1 cvs add caeRem1/description.html make update DBS=caeRem1 ZOO_DBS= # go public on genome-test cvs ci -m "Added caeRem1 (another worm)." makefile cvs ci -m "Empty initial trackDb.ra and description.html for caeRem1" worm # in a clean, updated tree's kent/src/hg/makeDb/trackDb: make alpha # SOME GBDB STUFF (DONE 3/30/2005 Andy) mkdir -p /gbdb/caeRem1 mkdir /cluster/data/caeRem1/html ln -s /cluster/data/caeRem1/html /gbdb/caeRem1/html ln -s /cluster/data/caeRem1/caeRem1.2bit /gbdb/caeRem1/caeRem1.2bit # MAKE GC5BASE WIGGLE TRACK (DONE 3/30/2005 Andy) ssh hgwdev mkdir /cluster/data/caeRem1/bed/gc5Base cd /cluster/data/caeRem1/bed/gc5Base hgGcPercent -wigOut -doGaps -file=stdout -win=5 -verbose=2 caeRem1 \ /cluster/data/caeRem1 | wigEncode stdin gc5Base.wig gc5Base.wib mkdir /gbdb/caeRem1/wib ln -s `pwd`/gc5Base.wib /gbdb/caeRem1/wib hgLoadWiggle -pathPrefix=/gbdb/caeRem1/wib caeRem1 gc5Base gc5Base.wig ## MAKE DOWNLOADABLE FILES (DONE 3/30/2005 Andy) ssh hgwdev cd /cluster/data/caeRem1 mkdir zips zip -j zips/allOut.zip RMOut/rmsk.lifted.out zip -j zips/allFa.zip maskedFa/*.fa zip -j zips/allTrf.zip bed/simpleRepeat/trfMask.bed zip -j zips/allAgp.zip downloads/remanei.agp cd maskedFa/ maskOutFa remanei.softMasked.fa hard remanei.hardMasked.fa cd ../ zip -j zips/allFaMasked.zip maskedFa/remanei.hardMasked.fa mkdir -p /usr/local/apache/htdocs/goldenPath/caeRem1 cd /usr/local/apache/htdocs/goldenPath/caeRem1 mkdir bigZips database # Create README.txt files in bigZips/ and database/ to explain the files. cd bigZips/ cp -p /cluster/data/caeRem1/zips/*.zip . md5sum *.zip > md5sum.txt # MAKE 11.OOC FILE FOR BLAT (DONE 3/30/2005 Andy) # Use -repMatch=100 (based on size -- for human we use 1024, and # fly size is ~4.4% of human judging by gapless dm1 genome size from # featureBits -- we would use 45, but bump that up a bit to be more # conservative). ssh hgwdev mkdir -p /panasas/store/caeRem1 blat /cluster/data/caeRem1/caeRem1.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=/panasas/store/caeRem1/11.ooc -repMatch=100 #Wrote 10174 overused 11-mers to /panasas/store/caeRem1/11.ooc # PRODUCING GENSCAN PREDICTIONS (DONE 3/30/2005 Andy) ssh hgwdev # Make hard-masked scaffolds and split up for processing: cd /cluster/data/caeRem1 mkdir hardMaskedFaSplits cd maskedFa/ faSplit -outDirDepth=1 -lift=../hardMaskedFaSplits/hardMasked.lft gap remanei.hardMasked.fa 1000000 . mv ? ../hardMaskedFaSplits mv ../hardMaskedFaSplits /panasas/store/caeRem1 mkdir /cluster/data/caeRem1/bed/genscan cd /cluster/data/caeRem1/bed/genscan # Check out hg3rdParty/genscanlinux to get latest genscan: cvs co hg3rdParty/genscanlinux # Make 3 subdirectories for genscan to put their output files in pushd /cluster/bluearc mkdir -p caeRem1/genscan cd caeRem1/genscan mkdir gtf pep subopt popd find /panasas/store/caeRem1/hardMaskedFaSplits -name '*.fa' chunks.lst cat << _EOF_ > gsub #LOOP gsBig {check in line+ \$(path1)} {check out line /cluster/bluearc/caeRem1/genscan/gtf/\$(root1).gtf} -trans={check out line /cluster/bluearc/caeRem1/genscan/pep/\$(root1).pep} -subopt={check out line /cluster/bluearc/caeRem1/genscan/subopt/\$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000 #ENDLOOP _EOF_ # OK if this needs to be redone, then it should be noted that I created 3626 files # in each of the /cluster/bluearc/caeRem1/genscan/* dirs, and I should've partitioned. # << this line keeps emacs coloring happy gensub2 chunks.lst single gsub jobList ssh kk cd /cluster/data/caeRem1/bed/genscan para create jobList para try para check para push #3626 jobs in batch #0 jobs (including everybody's) in Parasol queue. #Checking finished jobs #Completed: 3625 of 3626 jobs #Crashed: 1 jobs #CPU time in finished jobs: 8081s 134.69m 2.24h 0.09d 0.000 y #IO & Wait Time: 9851s 164.18m 2.74h 0.11d 0.000 y #Average job time: 5s 0.08m 0.00h 0.00d #Longest running job: 0s 0.00m 0.00h 0.00d #Longest finished job: 190s 3.17m 0.05h 0.00d #Submission to last job: 690s 11.50m 0.19h 0.01d # If there are crashes, diagnose with "para problems". # If a job crashes due to genscan running out of memory, re-run it # manually with "-window=200000" instead of "-window=2400000". ssh kolossus cd /cluster/data/caeRem1/bed/genscan gsBig /panasas/store/caeRem1/hardMaskedFaSplits/9/2649.fa \ /cluster/bluearc/caeRem1/genscan/gtf/2649.gtf \ -trans=/cluster/bluearc/caeRem1/genscan/pep/2649.pep \ -subopt=/cluster/bluearc/caeRem1/genscan/subopt/2649.bed \ -exe=hg3rdParty/genscanlinux/genscan \ -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=200000 # Lift and cat things ssh hgwdev cd /cluster/data/caeRem1/bed/genscan mv /cluster/bluearc/caeRem1/genscan/* . cat gtf/*.gtf > /tmp/genscan.gtf liftUp genscan.gtf /panasas/store/caeRem1/hardMaskedFaSplits/hardMasked.lft warn /tmp/genscan.gtf cat subopt/*.bed > /tmp/genscanSubopt.bed liftUp genscanSubopt.bed /panasas/store/caeRem1/hardMaskedFaSplits/hardMasked.lft warn /tmp/genscanSubopt.bed cat pep/*.pep > genscan.pep # Clean up rm -rf pep/ gtf/ subopt/ rm -rf /panasas/store/caeRem1/hardMaskedFaSplits # Load into the database: cd /cluster/data/caeRem1/bed/genscan ldHgGene -gtf caeRem1 genscan genscan.gtf hgPepPred caeRem1 generic genscanPep genscan.pep hgLoadBed caeRem1 genscanSubopt genscanSubopt.bed # GENBANK mRNA AND EST COUNTS (DONE 3/30/2005 Andy) # Go to the latest GenBank full release dir and get an idea of how # many mRNAs and ESTs there are to align. ssh eieio cd /cluster/data/genbank/data/processed/genbank.146.0/full grep remanei mrna.gbidx | wc -l #21 grep remanei est.*gbidx | wc -l #0 # Hmmmmm... I'm not sure if that's enough sequence to justify an EST track. # At this point I'll just start with the mRNA and refgene mRNAs # AUTO UPDATE GENBANK MRNA RUN (IN PROGRESS 3/30/2005 Andy) ssh hgwdev # Update genbank config and source in CVS: cd ~/kent/src/hg/makeDb/genbank cvsup . # caeRem1 (C. remanei) caeRem1.genome = /panasas/store/caeRem1/caeRem1.2bit caeRem1.lift = no caeRem1.refseq.mrna.native.load = yes caeRem1.genbank.mrna.xeno.load = yes caeRem1.genbank.est.xeno.load = no caeRem1.downloadDir = caeRem1 caeRem1.perChromTables = no cvs commit -m 'caeRem1 added to genbank update.' etc/genbank.conf # Edit src/align/gbBlat to add /panasas/store/caeRem1/11.ooc cvs commit -m '' src/align/gbBlat # Install to /cluster/data/genbank: make install-server ssh `fileServer /cluster/data/genbank/` cd /cluster/data/genbank # This is an -initial run, mRNA only: nice bin/gbAlignStep -srcDb=genbank -type=mrna -initial caeRem1 & tail -f [its logfile] # Load results: # ssh hgwdev # cd /cluster/data/genbank # nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad caeRem1 & # featureBits caeRem1 all_mrna # featureBits caeRem1 xenoMrna # # Clean up: # rm -rf work/initial.caeRem1 # At this point, I'm not adding ESTs. But if we do put them in later, # it'll be better to use the -drop option to gbDbLoadStep and do the # mRNA all over again too. # BRIAN-STYLE HUMAN PROTEINS TRACK (HG17) (IN PROGRESS 3/30/2005 Andy) ssh kolossus mkdir -p /cluster/data/caeRem1/blastDb cd /cluster/data/caeRem1/blastDb pushd ../maskedFa/ mkdir ../faSplits faSplit -outDirDepth=1 -lift=../faSplits/liftAll.lft gap remanei.softMasked.fa 500000 . mv ? ../faSplits/ popd for i in `seq 0 9`; do echo $i mkdir $i pushd $i ln -s ../../faSplits/$i/*.fa . for j in *.fa; do /iscratch/i/blast/formatdb -i $j -p F done popd done find . -name '*.fa' | xargs rm rm ?/*.log cd ../ cp -r blastDb/ /panasas/store/caeRem1 mkdir -p /cluster/data/caeRem1/bed/tblastn.hg17KG cd /cluster/data/caeRem1/bed/tblastn.hg17KG find /panasas/store/caeRem1/blastDb -name '*.nsq' | sed "s/\.nsq//" > query.lst calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(350000/`wc query.lst | awk "{print \\\$1}"`\) # 42156/(350000/3718) = 447.817166 mkdir -p /cluster/bluearc/caeRem1/bed/tblastn.hg17KG/kgfa split -l 448 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/caeRem1/bed/tblastn.hg17KG/kgfa/kg ln -s /cluster/bluearc/caeRem1/bed/tblastn.hg17KG/kgfa kgfa cd kgfa for i in *; do pslxToFa $i $i.fa; rm $i; done cd .. ls -1S kgfa/*.fa > kg.lst mkdir -p /cluster/bluearc/caeRem1/bed/tblastn.hg17KG/blastOut ln -s /cluster/bluearc/caeRem1/bed/tblastn.hg17KG/blastOut for i in `cat kg.lst`; do mkdir blastOut/`basename $i .fa`; done tcsh cat << _EOF_ > blastGsub #LOOP blastSome \$(path1) {check in line \$(path2)} {check out exists blastOut/\$(root2)/q.\$(root1).psl } #ENDLOOP _EOF_ cat << _EOF_ > blastSome #!/bin/sh BLASTMAT=/iscratch/i/blast/data export BLASTMAT g=\`basename \$2\` f=/tmp/\`basename \$3\`.\$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e \$eVal -p tblastn -d \$1 -i \$2 -o \$f.8 then mv \$f.8 \$f.1 break; fi done if test -f \$f.1 then if /cluster/bin/i386/blastToPsl \$f.1 \$f.2 then liftUp -nosort -type=".psl" -nohead \$f.3 /cluster/data/caeRem1/jkStuff/liftAll.lft warn \$f.2 liftUp -nosort -type=".psl" -pslQ -nohead \$3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn \$f.3 if pslCheck -prot \$3.tmp then mv \$3.tmp \$3 rm -f \$f.1 \$f.2 fi exit 0 fi fi rm -f \$f.1 \$f.2 \$3.tmp \$f.8 exit 1 _EOF_ chmod +x blastSome gensub2 query.lst kg.lst blastGsub blastSpec ssh kk cd /cluster/data/caeRem1/bed/tblastn.hg17KG para create blastSpec ####################################################################### # sangerGeneWS140 - mapping Ce3 sangerGene onto this sequence # (DONE - 2005-08-17 - Hiram) # ssh pk mkdir /san/sanvol1/scratch/worms/caeRem1/blastDb cd /san/sanvol1/scratch/worms/caeRem1/blastDb faSplit sequence /cluster/data/caeRem1/maskedFa/remanei.softMasked.fa 10 c_ # And construct the blast database ssh kkr10u01 for i in *.fa do /cluster/bluearc/blast2211x86_64/bin/formatdb -i $i -p F done exit mkdir -p /cluster/data/caeRem1/bed/tblastn.sangerGene cd /cluster/data/caeRem1/bed/tblastn.sangerGene ls -1S /san/sanvol1/scratch/worms/caeRem1/blastDb/*.fa > query.lst ls -1S /san/sanvol1/scratch/worms/ce3/splitPep/*.fa > pep.lst mkdir blastOut for i in `cat pep.lst`; do mkdir blastOut/`basename $i .fa`; done cat << '_EOF_' > gsub #LOOP blastSome {check in line+ $(path1)} {check in line+ $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } #ENDLOOP '_EOF_' # happy emacs cat << '_EOF_' > blastSome #!/bin/sh BLASTMAT=/cluster/bluearc/blast2211x86_64/data export BLASTMAT g=`basename $2` f=/tmp/`basename $3`.$g for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11 do if /cluster/bluearc/blast2211x86_64/bin/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8 then mv $f.8 $f.1 break; fi done if test -f $f.1 then if /cluster/bin/i386/blastToPsl $f.1 $f.2 then if pslCheck -prot $f.2 then cp -p $f.2 $3 rm -f $f.1 $f.2 fi exit 0 fi fi rm -f $f.1 $f.2 $f.8 exit 1 '_EOF_' # happy emacs chmod +x blastSome gensub2 query.lst pep.lst gsub jobList para create jobList para try; push; check ... etc ... # Completed: 8170 of 8170 jobs # CPU time in finished jobs: 119476s 1991.26m 33.19h 1.38d 0.004 y # IO & Wait Time: 21472s 357.87m 5.96h 0.25d 0.001 y # Average job time: 17s 0.29m 0.00h 0.00d # Longest running job: 0s 0.00m 0.00h 0.00d # Longest finished job: 33s 0.55m 0.01h 0.00d # Submission to last job: 1258s 20.97m 0.35h 0.01d cat << '_EOF_' > chainGsub #LOOP chainSome $(path1) #ENDLOOP '_EOF_' # happy emacs cat << '_EOF_' > chainSome (cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=7000 stdin ../c.`basename $1`.psl) '_EOF_' # happy emacs chmod +x chainSome ls -1dS `pwd`/blastOut/wp???? > chain.lst gensub2 chain.lst single chainGsub chainJobs para create chainJobs para try; push; check ... etc ... # Completed: 817 of 817 jobs # CPU time in finished jobs: 40s 0.67m 0.01h 0.00d 0.000 y # IO & Wait Time: 2092s 34.87m 0.58h 0.02d 0.000 y # Average job time: 3s 0.04m 0.00h 0.00d # Longest running job: 0s 0.00m 0.00h 0.00d # Longest finished job: 6s 0.10m 0.00h 0.00d # Submission to last job: 97s 1.62m 0.03h 0.00d ssh kkstore01 cd /cluster/data/caeRem1/bed/tblastn.sangerGene/blastOut for i in wp???? do awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl sort -rn c60.$i.psl | pslUniq stdin u.$i.psl awk "((\$1 / \$11) ) > 0.60 {print}" c60.$i.psl > m60.$i.psl echo $i done cat u.*.psl m60* | sort -T /tmp -k 14,14 -k 16,16n -k 17,17n \ | uniq > ../preblastSangerGene.psl XXXX cd .. # haven't tried this yet - 2005-08-12 - should be interesting, need a # psl file indicating where the sangerGene's are on Ce3 and alias name list blatDir=/cluster/data/hg16/bed/blat.hg16KG protDat -kg preblastHg16KG.psl $blatDir/hg16KG.psl $blatDir/kg.mapNames blastHg16KG.psl XXXX ssh hgwdev cd /cluster/data/caeRem1/bed/tblastn.sangerGene hgLoadPsl -table=blastSangerGene caeRem1 preblastSangerGene.psl # clean up ssh kkstore01 cd /cluster/data/cb2/bed/tblastn.sangerGene rm -rf blastOut ############################################################################ # BLATSERVERS ENTRY (DONE - 2005-08-31 - Hiram) ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("caeRem1", "blat16", "17788", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("caeRem1", "blat16", "17789", "0", "1");' \ hgcentraltest ############################################################################ # preparing goldenPath downloads (WORKING 2005-12-14 - Hiram) ssh kkstore01 mkdir /cluster/data/caeRem1/goldenPath cd /cluster/data/caeRem1/goldenPath mkdir bigZips cp -p ../maskedFa/remanei.softMasked.fa bigZips/superContigs.masked.fa gzip bigZips/superContigs.masked.fa cp -p ../RMOut/rmsk.lifted.out bigZips/rmsk.out gzip bigZips/rmsk.out ssh hgwdev mkdir /usr/local/apache/htdocs/goldenPath/caeRem1/bigZips cd /usr/local/apache/htdocs/goldenPath/caeRem1/bigZips ln -s /cluster/data/caeRem1/goldenPath/bigZips/* .