# for emacs: -*- mode: sh; -*- # Ciona Intestinalis V2.0 from JGI # DOWNLOAD SEQUENCE ssh kkstore02 mkdir /cluster/store11/cioSav2/downloads ln -s /cluster/store11/cioSav2 /cluster/data cd /cluster/data/cioSav2/downloads cd .. mkdir chunks500k jkStuff scaffolds nib cd scaffolds faSplit byname ../download/*fa . for i in *.fa do faToNib $i ../nib/`basename $i .fa`.nib done cd .. faSplit gap download/*fa 50000 chunks50k/x -lift=jkStuff/chunks50k.lft -minGapSize=100 # REPEAT MASKING #- Make the run directory and job list: cd /cluster/data/cioSav2 tcsh cat << '_EOF_' > jkStuff/RMCiona #!/bin/csh -fe cd $1 pushd . /bin/mkdir -p /tmp/cioSav2/$2 /bin/cp $2 /tmp/cioSav2/$2/ cd /tmp/cioSav2/$2 /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s $2 popd /bin/cp /tmp/cioSav2/$2/$2.out ./ if (-e /tmp/cioSav2/$2/$2.align) /bin/cp /tmp/cioSav2/$2/$2.align ./ if (-e /tmp/cioSav2/$2/$2.tbl) /bin/cp /tmp/cioSav2/$2/$2.tbl ./ if (-e /tmp/cioSav2/$2/$2.cat) /bin/cp /tmp/cioSav2/$2/$2.cat ./ /bin/rm -fr /tmp/cioSav2/$2/* /bin/rmdir --ignore-fail-on-non-empty /tmp/cioSav2/$2 /bin/rmdir --ignore-fail-on-non-empty /tmp/cioSav2 '_EOF_' # << this line makes emacs coloring happy chmod +x jkStuff/RMCiona exit mkdir RMRun for i in chunks50k/*.fa do d=`dirname $i` f=`basename $i` echo "/cluster/data/cioSav2/jkStuff/RMCiona /cluster/data/cioSav2/$d $f {check out line+ /cluster/data/cioSav2/$d/$f.out} " done > RMRun/RMJobs #- Do the run ssh pk cd /cluster/data/cioSav2/RMRun para create RMJobs para try, para check, para check, para push, para check,... # Completed: 4617 of 4617 jobs # CPU time in finished jobs: 133300s 2221.66m 37.03h 1.54d 0.004 y # IO & Wait Time: 703794s 11729.90m 195.50h 8.15d 0.022 y # Average job time: 181s 3.02m 0.05h 0.00d # Longest finished job: 651s 10.85m 0.18h 0.01d # Submission to last job: 2948s 49.13m 0.82h 0.03d featureBits cioSav2 rmsk # 22881665 bases of 141233565 (16.201%) in intersection # SIMPLE REPEATS (TRF) ssh kkstore02 mkdir -p /cluster/data/cioSav2/bed/simpleRepeat cd /cluster/data/cioSav2/bed/simpleRepeat mkdir trf tcsh cp /dev/null jobs.csh foreach d (/cluster/data/cioSav2/chunks500k) foreach f ($d/*.fa) set fout = $f:t:r.bed echo $fout echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" >> jobs.csh end end screen csh -ef jobs.csh # check on this with tail -f jobs.log wc -l jobs.csh ls -1 trf | wc -l endsInLf trf/* # When job is done do: liftUp simpleRepeat.bed /cluster/data/cioSav2/jkStuff/liftAll.lft error trf/*.bed # Load into the database: ssh hgwdev hgLoadBed cioSav2 simpleRepeat /cluster/data/cioSav2/bed/simpleRepeat/simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql featureBits cioSav2 simpleRepeat # 6047914 bases of 141233565 (4.282%) in intersection # Create the database. ssh hgwdev echo 'create database cioSav2' | hgsql '' # CREATING GRP TABLE FOR TRACK GROUPING echo "create table grp (PRIMARY KEY(NAME)) select * from hg17.grp" | hgsql cioSav2 echo 'insert into blatServers values("cioSav2", "blat15", "17780", "1","0"); \ insert into blatServers values("cioSav2", "blat15", "17781", "0","1");' \ | hgsql -h genome-testdb hgcentraltest # Add dbDb and defaultDb entries: echo 'insert into dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk) values("cioSav2", "July 2005", "/gbdb/cioSav2/nib", "C. savignyi", "", 1, \ 44, "C. savignyi", "Ciona savignyi", "/gbdb/cioSav2/html/description.html", 0);' \ | hgsql -h genome-testdb hgcentraltest # STORING O+O SEQUENCE AND ASSEMBLY INFORMATION # Make symbolic links in /gbdb/cioSav2/nib to the real nibs. ssh hgwdev mkdir /gbdb/cioSav2 ln -s /cluster/data/cioSav2/nib /gbdb/cioSav2/nib # Load /gbdb/cioSav2/nib paths into database cd /cluster/data/cioSav2 hgsql cioSav2 < ~kent/src/hg/lib/chromInfo.sql hgNibSeq -preMadeNib cioSav2 /gbdb/cioSav2/nib scaffolds/*.fa # make gcPercent table ssh hgwdev mkdir -p /cluster/data/cioSav2/bed/gcPercent cd /cluster/data/cioSav2/bed/gcPercent hgsql cioSav2 < ~/kent/src/hg/lib/gcPercent.sql hgGcPercent cioSav2 ../../nib #dropped gcPercent table (DONE braney 2005-08-19) ### AUTO UPDATE GENBANK MRNA RUN (DONE markd 2005-08-17) # genbank done with revised alignment procedure # Update genbank config and source in CVS: cd ~/kent/src/hg/makeDb/genbank cvs update etc # Edit etc/genbank.conf and add these lines: # NOTE: braney created a ooc and this was added after the initial alignment # cioSav2 (ciona intestinalis) cioSav2.serverGenome = /cluster/data/cioSav2/cioSav2.2bit cioSav2.clusterGenome = /iscratch/i/cioSav2/cioSav2.2bit cioSav2.ooc = no cioSav2.maxIntron = 20000 cioSav2.lift = no cioSav2.refseq.mrna.native.load = yes cioSav2.refseq.mrna.xeno.load = yes cioSav2.refseq.mrna.xeno.pslCDnaFilter = -minCover=0.25 -coverNearTop=0.005 -minId=0.15 -idNearTop=0.005 -maxRepMatch=0.4 -bestOverlap cioSav2.genbank.mrna.xeno.load = yes cioSav2.genbank.est.xeno.load = no cioSav2.downloadDir = cioSav2 cioSav2.perChromTables = no cvs ci etc/genbank.conf # Install to /cluster/data/genbank: make etc-update ssh kkstore02 cd /cluster/data/genbank # do mrnas and ests in one run nice bin/gbAlignStep -initial cioSav2 & # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -drop -initialLoad cioSav2 ### gap and repeats tables ssh hgwdev mkdir -p /cluster/data/cioSav2/bed/gapRmsk cd /cluster/data/cioSav2/bed/gapRmsk simpleGap /cluster/data/cioSav2/nib gap.bed repeats.bed echo "drop table gap;" | hgsql cioSav2 hgsql cioSav2 < ~/kent/src/hg/lib/gap.sql hgLoadBed -oldTable cioSav2 gap gap.bed echo "create index chrom on gap (chrom(13), bin) ;" | hgsql cioSav2 echo "create index chrom_2 on gap (chrom(13), chromStart);" | hgsql cioSav2 echo "create index chrom_3 on gap (chrom(13), chromEnd);" | hgsql cioSav2 echo "drop index bin on rmsk;" | hgsql cioSav2 echo "drop index genoStart on rmsk;" | hgsql cioSav2 echo "drop index genoEnd on rmsk;" | hgsql cioSav2 echo "create index chrom_2 on rmsk (genoName(13), genoStart);" | hgsql cioSav2 echo "create index chrom_3 on rmsk (genoName(13), genoEnd);" | hgsql cioSav2 ssh kkstore02 mkdir -p /cluster/data/cioSav2/bed/simpleRepeat cd /cluster/data/cioSav2/bed/simpleRepeat mkdir trf for i in ../../scaffolds/*.fa do trfBig $i /dev/null -bedAt=trf/`basename $i .fa`.bed > /dev/null 2>&1 ; echo $i; done cat trf/* > simpleRepeat.bed ssh hgwdev hgLoadBed cioSav2 simpleRepeat /cluster/data/cioSav2/bed/simpleRepeat/simpleRepeat.bed \ -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql ssh kkstore02 cd /cluster/data/cioSav2/bed/simpleRepeat mkdir -p trfMask cd trf for i in *.bed do awk '{if ($5 <= 12) print;}' $i > ../trfMask/$i done cd ../trfMask liftUp ../../../all.trfMask.bed ../../../jkStuff/chunks500k.lft error *.bed # use RepeatMasker and simpleRepeat to build masked fa's cd /cluster/data/cioSav2 mkdir maskedScaffolds cat scaffolds/*.fa | maskOutFa -soft stdin all.out all.tmp.fa maskOutFa -softAdd all.tmp.fa all.trfMask.bed all.masked.fa # Rebuild the nib files, using the soft masking in the fa: mkdir -p /cluster/data/cioSav2/nib cd /cluster/data/cioSav2/maskedScaffolds for i in *.fa do faToNib -softMask $i ../nib/`basename $i .fa`.nib done # Make one big 2bit file as well, and make a link to it in faToTwoBit *.fa ../cioSav2.2bit ln -s /cluster/data/cioSav2/cioSav2.2bit /gbdb/cioSav2/ ### SNAP GENE PREDICTIONS FROM COLIN DEWEY ssh hgwdev mkdir /cluster/data/cioSav2/bed/snap cd /cluster/data/cioSav2/bed/snap # contact: Colin Dewey wget "http://hanuman.math.berkeley.edu/~cdewey/tracks/SNAP.CioInt_2.gff.gz" gunzip SNAP.CioInt_2.gff.gz ldHgGene -gtf -frame -id -geneName cioSav2 snapGene SNAP.CioInt_2.gff # 31491546 bases of 141233565 (22.297%) in intersection # in ci1 28699953 bases of 113192845 (25.355%) in intersection # MAKE DOWNLOADABLE SEQUENCE FILES ssh kkstore02 cd /cluster/data/cioSav2 #- Build the .zip files cat << '_EOF_' > jkStuff/zipAll.csh rm -rf zip mkdir zip zip -j zip/Scaffold.out.zip all.out zip -j zip/ScaffoldFa.zip scaffolds/*.fa zip -j zip/ScaffoldFaMasked.zip maskedScaffolds/*.fa cd bed/simpleRepeat zip -j ../../zip/ScaffoldTrf.zip simpleRepeat.bed cd ../.. '_EOF_' # << this line makes emacs coloring happy ### NOTES: NEED NEW GENBANK PROCESS FOR THIS csh ./jkStuff/zipAll.csh |& tee zipAll.log cd zip #- Look at zipAll.log to make sure all file lists look reasonable. #- Check zip file integrity: foreach f (*.zip) unzip -t $f > $f.test tail -1 $f.test end wc -l *.zip.test #- Copy the .zip files to hgwdev:/usr/local/apache/... ssh hgwdev cd /cluster/data/cioSav2/zip set gp = /usr/local/apache/htdocs/goldenPath/cioSav2 mkdir -p $gp/bigZips cp -p *.zip $gp/bigZips # mkdir -p $gp/scaffolds # foreach f ( ../*/chr*.fa ) # zip -j $gp/chromosomes/$f:t.zip $f # end cd $gp/bigZips md5sum *.zip > md5sum.txt # cd $gp/chromosomes # md5sum *.zip > md5sum.txt # Take a look at bigZips/* and chromosomes/*, update their README.txt's # MAKE 11.OOC FILE FOR BLAT mkdir /cluster/bluearc/cioSav2 blat /cluster/data/cioSav2/cioSav2.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=/cluster/bluearc/cioSav2/11.ooc -repMatch=100 # Wrote 10830 overused 11-mers to /cluster/bluearc/cioSav2/11.ooc cp -p /cluster/bluearc/cioSav2/*.ooc /iscratch/i/cioSav2/ iSync ####################################################################### # GC5BASE (DONE - 2005-08-19 - Hiram) ssh kkstore02 mkdir /cluster/data/cioSav2/bed/gc5Base cd /cluster/data/cioSav2/bed/gc5Base hgGcPercent -wigOut -doGaps -file=stdout -win=5 cioSav2 \ /cluster/data/cioSav2 | wigEncode stdin gc5Base.wig gc5Base.wib ssh hgwdev cd /cluster/data/cioSav2/bed/gc5Base mkdir /gbdb/cioSav2/wib ln -s `pwd`/gc5Base.wib /gbdb/cioSav2/wib hgLoadWiggle cioSav2 gc5Base gc5Base.wig ####################################################################### ### AUTO UPDATE GENBANK MRNA RUN (DONE markd 2005-08-31) # redo genbank revised alignment procedure once again to # pickup local near best pslCDnaFilter # Update genbank config and source in CVS: cd ~/kent/src/hg/makeDb/genbank cvs update etc # Edit etc/genbank.conf and add these lines: # cioSav2 (ciona intestinalis) cioSav2.serverGenome = /cluster/data/cioSav2/cioSav2.2bit cioSav2.clusterGenome = /iscratch/i/cioSav2/cioSav2.2bit cioSav2.ooc = /iscratch/i/cioSav2/11.ooc cioSav2.maxIntron = 20000 cioSav2.lift = no cioSav2.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} cioSav2.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} cioSav2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} cioSav2.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} cioSav2.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} cioSav2.genbank.est.xeno.pslCDnaFilter = ${lowCover.genbank.est.xeno.pslCDnaFilter} cioSav2.refseq.mrna.native.load = yes cioSav2.refseq.mrna.xeno.load = yes cioSav2.genbank.mrna.xeno.load = yes cioSav2.genbank.est.xeno.load = no cioSav2.downloadDir = cioSav2 cioSav2.perChromTables = no cvs ci etc/genbank.conf # Install to /cluster/data/genbank: make etc-update ssh kkstore02 cd /cluster/data/genbank # do mrnas and ests in one run nice bin/gbAlignStep -initial cioSav2 & # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -drop -initialLoad cioSav2 # Note: download sequences are made as part of the first # genbank update on hgdownload.