# for emacs: -*- mode: sh; -*- # Ciona Savigny from Broad/MIT 4/25/2003 data version # # DOWNLOAD SEQUENCE (2004-05-11 kate) # Download description: # http://www.broad.mit.edu/cgi-bin/annotation/ciona/download_license.cgi # Assembly description: # http://www.broad.mit.edu/annotation/ciona/assembly.html # # 3 sets of sequences, with quality scores -- "paired scaffolds", "paired scaffold remainders" # and "separate scaffolds". Assembly details on the web site. ssh kksilo mkdir /cluster/store7/cioSav1 ln -s /cluster/store7/cioSav1 /cluster/data cd /cluster/data/cioSav1 wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/paired_scaffold_remainders.fa.gz wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/paired_scaffold_remainders.qual.gz wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/paired_scaffolds.fa.gz wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/paired_scaffolds.qual.gz wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/separate_scaffolds.fa.gz wget ftp://ftp.broad.mit.edu/pub/annotation/ciona/assembly_4_25_2003/separate_scaffolds.qual.gz gunzip paired_scaffolds.fa.gz mkdir scaffolds sed -e "s/paired_scaffold/ps/" paired_scaffolds.fa | faSplit byname paired_scaffolds.fa scaffolds/ mkdir nib cd nib foreach i (../scaffolds/*.fa) faToNib -softMask $i `basename $i .fa`.nib end ### CREATE DATABASE # Create the database. ssh hgwdev echo 'create database cioSav1' | hgsql '' # CREATING GRP TABLE FOR TRACK GROUPING echo "create table grp (PRIMARY KEY(NAME)) select * from hg17.grp" \ | hgsql cioSav1 # echo 'insert into blatServers values("cioSav1", "blat10", "17780", "1"); \ # insert into blatServers values("cioSav1", "blat10", "17781", "0");' \ # | hgsql -h genome-testdb hgcentraltest mkdir /gbdb/cioSav1 ln -s /cluster/data/cioSav1/nib /gbdb/cioSav1/nib hgsql cioSav1 < ~/kent/src/hg/lib/chromInfo.sql cd /cluster/data/cioSav1 hgNibSeq -preMadeNib cioSav1 /gbdb/cioSav1/nib scaffolds/*.fa mkdir -p ~/kent/src/hg/makeDb/trackDb/squirt/cioSav1 cd $HOME/kent/src/hg/makeDb/trackDb cvsup # Edit that makefile to add cioSav1 in all the right places and do make cvs commit -m "Added cioSav1." makefile # Add dbDb and defaultDb entries: echo 'insert into dbDb (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk, hgPbOk, sourceName) \ values("cioSav1", "Sept. 2001", \ "/gbdb/cioSav1/nib", "C. savignyi", "ps_233", 1, \ 51, "C. savignyi", "Ciona Savignyi", \ "/gbdb/cioSav1/html/description.html", 0, 0, \ "Broad");' | hgsql -h genome-testdb hgcentraltest echo 'insert into defaultDb (genome, name) \ values ("C. savignyi", "cioSav1");' \ | hgsql -h genome-testdb hgcentraltest # MAKE DESCRIPTION/SAMPLE POSITION HTML PAGE mkdir /cluster/data/cioSav1/html cd /cluster/data/cioSav1/html mkdir -p ~/kent/src/hg/makeDb/trackDb/squirt/cioSav1 # make a symbolic link from /gbdb/cioSav1/html to /cluster/data/cioSav1/html ln -s /cluster/data/cioSav1/html /gbdb/cioSav1/html # NEED TO ADD DESCRIPTION.HTML PAGE # make gcPercent table mkdir -p ~/cioSav1/bed/gcPercent cd ~/cioSav1/bed/gcPercent hgsql cioSav1 < ~/kent/src/hg/lib/gcPercent.sql hgGcPercent cioSav1 /cluster/data/cioSav1/nib # BLASTZ CI1 ssh kksilo mkdir /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21 cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21 ln -s blastz.ci1.2004-08-21 /cluster/data/cioSav1/bed/blastz.ci1 cat << '_EOF_' > DEF export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin ALIGN=blastz-run BLASTZ=blastz BLASTZ_H=2000 BLASTZ_ABRIDGE_REPEATS=0 SEQ1_DIR=/iscratch/i/cioSav1/nib # unused: SEQ1_RMSK= SEQ1_SMSK= SEQ1_FLAG=-ciona SEQ1_IN_CONTIGS=0 SEQ1_CHUNK=10000000 SEQ1_LAP=10000 SEQ2_DIR=/cluster/data/ci1/maskedFa # unused: SEQ2_RMSK= SEQ2_SMSK= SEQ2_FLAG=-ciona SEQ2_IN_CONTIGS=1 SEQ2_CHUNK=10000000 SEQ2_LAP=0 BASE=/cluster/data/cioSav1/bed/blastz.ci1.2004-08-21 DEF=$BASE/DEF RAW=$BASE/raw CDBDIR=$BASE SEQ1_LEN=$BASE/S1.len SEQ2_LEN=$BASE/S2.len '_EOF_' # << this line keeps emacs coloring happy # run bash shell if you don't already: bash source DEF mkdir run ~angie/hummus/make-joblist $DEF > $BASE/run/j sh ./xdir.sh cd run sed -e 's#^#/cluster/bin/penn/#' j > j2 wc -l j* head j2 mv j2 j # cluster run ssh kk cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/run para create j para try, check, push, check, .... # Completed: 176616 of 176616 jobs # CPU time in finished jobs: 327504s 5458.39m 90.97h 3.79d 0.010 y # IO & Wait Time: 558756s 9312.61m 155.21h 6.47d 0.018 y # Average job time: 5s 0.08m 0.00h 0.00d # Longest job: 473s 7.88m 0.13h 0.01d # Submission to last job: 1109s 18.48m 0.31h 0.01d # back in the bash shell on kksilo... mkdir /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/run.1 cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/run.1 /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE > j # small cluster run ssh kki cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/run.1 para create j para try, check, push, check, .... # Completed: 446 of 446 jobs # CPU time in finished jobs: 1629s 27.15m 0.45h 0.02d 0.000 y # IO & Wait Time: 3359s 55.98m 0.93h 0.04d 0.000 y # Average job time: 11s 0.19m 0.00h 0.00d # Longest job: 22s 0.37m 0.01h 0.00d # Submission to last job: 322s 5.37m 0.09h 0.00d cd .. rm -r raw # Translate .lav to axt, with ci1 in scaffold coords for collaborators: ssh kksilo cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21 mkdir axtChrom foreach c (lav/*) pushd $c set chr=$c:t set out=axtChrom/$chr.axt echo "Translating $chr lav to $out" cat `ls -1 *.lav | sort -g` | lavToAxt stdin /cluster/data/cioSav1/nib /cluster/data/ci1/nib stdout \ | axtSort stdin ../../$out popd end # CHAIN CI1 BLASTZ # Run axtChain on little cluster ssh kki cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21 mkdir -p axtChain/run1 cd axtChain/run1 mkdir out chain ls -1S /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChrom/*.axt > input.lst cat << '_EOF_' > gsub #LOOP doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out exists out/$(root1).out} #ENDLOOP '_EOF_' # << this line makes emacs coloring happy cat << '_EOF_' > doChain #!/bin/csh axtChain -verbose=0 $1 /iscratch/i/cioSav1/nib /iscratch/i/ci1/nib $2 > $3 '_EOF_' # << this line makes emacs coloring happy chmod a+x doChain gensub2 input.lst single gsub jobList para create jobList para try, check, push, check... # Completed: 443 of 446 jobs # Crashed: 3 jobs # chain/ps_242.chain is empty # chain/ps_280.chain is empty # chain/ps_320.chain is empty # Average job time: 3s 0.05m 0.00h 0.00d # Longest job: 9s 0.15m 0.00h 0.00d # Submission to last job: 139s 2.32m 0.04h 0.00d # now on the cluster server, sort chains ssh kksilo cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain chainMergeSort run1/chain/*.chain > all.chain # chainSplit chain all.chain rm run1/chain/*.chain # take a look at score distr's foreach f (chain/*.chain) grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r echo $f:t:r textHistogram -binSize=10000 /tmp/score.$f:t:r echo "" end # Load chains into database ssh hgwdev cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain/chain hgLoadChain cioSav1 chainCi1 all.chain # foreach i (*.chain) # set c = $i:r # echo loading $c # hgLoadChain cioSav1 ${c}_chainCi1 $i # end # NET Ci1 BLASTZ ssh kksilo cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain chainPreNet all.chain ../S1.len ../S2.len stdout \ | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \ | netSyntenic stdin noClass.net # Add classification info using db tables: ssh hgwdev cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain netClass -noAr noClass.net cioSav1 ci1 ci1.net # Make a 'syntenic' subset: ssh kksilo cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain rm noClass.net netFilter -syn ci1.net > ci1Syn.net # Load the nets into database ssh hgwdev cd /cluster/data/cioSav1/bed/blastz.ci1.2004-08-21/axtChain netFilter -minGap=10 ci1.net | hgLoadNet cioSav1 netCi1 stdin netFilter -minGap=10 ci1Syn.net | hgLoadNet cioSav1 netSyntenyCi1 stdin