# for emacs: -*- mode: sh; -*- # Drosophila mojavensis -- Agencourt "CAF1" via Eisen's 12-fly site # THIS IS ONLY TO GET MASKED SEQUENCE -- NOT A BROWSER AT THIS POINT ######################################################################### # DOWNLOAD SEQUENCE (DONE 9/21/06 angie) ssh kkstore05 mkdir /cluster/store12/droMoj3 ln -s /cluster/store12/droMoj3 /cluster/data/droMoj3 mkdir /cluster/data/droMoj3/downloads cd /cluster/data/droMoj3/downloads wget http://rana.lbl.gov/drosophila/caf1/dmoj_caf1.tar.gz tar xvzf dmoj_caf1.tar.gz cd dmoj faSize scaffolds.bases #193826310 bases (13618479 N's 180207831 real 180207831 upper 0 lower) in 6841 sequences in 1 files #Total size: mean 28333.0 sd 756578.0 min 101 (scaffold_5710) max 34148556 (scaffold_6540) median 1654 #N count: mean 1990.7 sd 16742.8 #U count: mean 26342.3 sd 746409.9 #L count: mean 0.0 sd 0.0 ######################################################################### # MAKE GENOME DB *UP TO DB STEP ONLY* (DONE 9/21/06 angie) ssh kkstore05 cd /cluster/data/droMoj3 cat > droMoj3.config.ra <& makeGenomeDb.log & tail -f makeGenomeDb.log ######################################################################### # REPEATMASKER (DONE 9/21/06 angie) ssh kkstore05 # Run -debug to create the dir structure and preview the scripts: doRepeatMasker.pl droMoj3 -verbose 3 -debug # Run it for real and tail the log: doRepeatMasker.pl droMoj3 -species drosophila -verbose 3 \ >& /cluster/data/droMoj3/bed/RepeatMasker.2006-09-21/do.log & tail -f /cluster/data/droMoj3/bed/RepeatMasker.2006-09-21/do.log # RepeatMasker and lib version from do.log: # March 20 2006 (open-3-1-5) version of RepeatMasker #CC RELEASE 20060315; * # Compare coverage to previous assembly: featureBits -chrom=scaffold_6540 droMoj3 rmsk #3474987 bases of 33912366 (10.247%) in intersection featureBits -chrom=scaffold_6540 droMoj2 rmsk #3448791 bases of 33921626 (10.167%) in intersection ######################################################################### # SIMPLE REPEATS (TRF) (DONE 9/21/06 angie) ssh kolossus nice tcsh mkdir /cluster/data/droMoj3/bed/simpleRepeat cd /cluster/data/droMoj3/bed/simpleRepeat twoBitToFa ../../droMoj3.unmasked.2bit stdout \ | trfBig -trf=/cluster/bin/i386/trf stdin /dev/null \ -bedAt=simpleRepeat.bed -tempDir=/tmp \ >& trf.log & tail -f trf.log # ~50 minutes (longer than D. mel, must be because of the scaffolds) # Make a filtered version for sequence masking: awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed # Load unfiltered repeats into the database: ssh hgwdev hgLoadBed droMoj3 simpleRepeat \ /cluster/data/droMoj3/bed/simpleRepeat/simpleRepeat.bed \ -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql # Compare coverage to previous assembly: featureBits -chrom=scaffold_6540 droMoj3 simpleRepeat #826554 bases of 33912366 (2.437%) in intersection featureBits -chrom=scaffold_6540 droMoj2 simpleRepeat #829580 bases of 33921626 (2.446%) in intersection ######################################################################### # MASK SEQUENCE WITH FILTERED TRF IN ADDITION TO RM (DONE 9/21/06 angie) ssh kolossus cd /cluster/data/droMoj3 time twoBitMask droMoj3.rmsk.2bit -add bed/simpleRepeat/trfMask.bed droMoj3.2bit # This warning is OK -- the extra fields are not block coordinates. #Warning: BED file bed/simpleRepeat/trfMask.bed has >=13 fields which means it might contain block coordinates, but this program uses only the first three fields (the entire span -- no support for blocks). #0.321u 0.274s 0:01.97 29.9% 0+0k 0+0io 0pf+0w # Because this is a no-browser build (just masking for alignment) # I did not make the usual /gbdb/$db/$db.2bit link. ########################################################################### # BLASTZ/CHAIN/NET DROVIR3 (DONE 10/4/06 angie) ssh kkstore05 mkdir /cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02 cd /cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02 cat << '_EOF_' > DEF # D. mojavensis vs. D. virilis BLASTZ_H=2000 BLASTZ_Y=3400 BLASTZ_L=4000 BLASTZ_K=2200 BLASTZ_Q=/cluster/data/blastz/HoxD55.q # TARGET - D. mojavensis SEQ1_DIR=/iscratch/i/droMoj3/droMoj3.2bit SEQ1_CHUNK=10000000 SEQ1_LAP=10000 SEQ1_LEN=/cluster/data/droMoj3/chrom.sizes # QUERY - D. virilis SEQ2_DIR=/iscratch/i/droVir3/droVir3.2bit SEQ2_CHUNK=10000000 SEQ2_LAP=10000 SEQ2_LEN=/cluster/data/droVir3/chrom.sizes BASE=/cluster/data/droMoj3/bed/blastz.droVir3.2006-10-02 '_EOF_' # << this line keeps emacs coloring happy doBlastzChainNet.pl DEF \ -blastzOutRoot /panasas/store/droMoj3droVir3 >& do.log & tail -f do.log ln -s blastz.droVir3.2006-10-02 /cluster/data/droMoj3/bed/blastz.droVir3 ########################################################################### # BLASTZ/CHAIN/NET DROGRI2 (DONE 10/4/06 angie) ssh kkstore05 mkdir /cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02 cd /cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02 cat << '_EOF_' > DEF # D. mojavensis vs. D. grimshawi BLASTZ_H=2000 BLASTZ_Y=3400 BLASTZ_L=4000 BLASTZ_K=2200 BLASTZ_Q=/cluster/data/blastz/HoxD55.q # TARGET - D. mojavensis SEQ1_DIR=/iscratch/i/droMoj3/droMoj3.2bit SEQ1_CHUNK=10000000 SEQ1_LAP=10000 SEQ1_LEN=/cluster/data/droMoj3/chrom.sizes # QUERY - D. grimshawi SEQ2_DIR=/iscratch/i/droGri2/droGri2.2bit SEQ2_CHUNK=10000000 SEQ2_LAP=10000 SEQ2_LEN=/cluster/data/droGri2/chrom.sizes BASE=/cluster/data/droMoj3/bed/blastz.droGri2.2006-10-02 '_EOF_' # << this line keeps emacs coloring happy doBlastzChainNet.pl DEF \ -blastzOutRoot /cluster/bluearc/droMoj3droGri2 >& do.log & tail -f do.log ln -s blastz.droGri2.2006-10-02 /cluster/data/droMoj3/bed/blastz.droGri2 ######################################################################### # MAKE 11.OOC FILE FOR BLAT (DONE 9/18/08 angie) # Use -repMatch=100 like droMoj2. ssh kolossus blat /hive/data/genomes/droMoj3/droMoj3.2bit /dev/null /dev/null \ -tileSize=11 -makeOoc=/hive/data/genomes/droMoj3/11.ooc -repMatch=100 #Wrote 17148 overused 11-mers to /hive/data/genomes/droMoj3/11.ooc ######################################################################### # LIFTOVER TO DROMOJ2 (DONE 9/18/08) doSameSpeciesLiftOver.pl -bigClusterHub=swarm -workhorse=kolossus \ droMoj3 droMoj2 -debug # *** Steps were performed in /cluster/data/droMoj3/bed/blat.droMoj2.2008-09-18 cd /cluster/data/droMoj3/bed/blat.droMoj2.2008-09-18 doSameSpeciesLiftOver.pl -bigClusterHub=swarm -workhorse=kolossus \ -ooc=/hive/data/genomes/droMoj3/11.ooc droMoj3 droMoj2 >& do.log & tail -f do.log ######################################################################### # SWAP DM3 CHAIN/NET (DONE 4/6/09 angie) mkdir /hive/data/genomes/droMoj3/bed/blastz.dm3.swap cd /hive/data/genomes/droMoj3/bed/blastz.dm3.swap doBlastzChainNet.pl -swap -bigClusterHub swarm -smallClusterHub memk \ -workhorse kolossus \ /hive/data/genomes/dm3/bed/blastz.droMoj3/DEF.hive >& do.log & tail -f do.log ln -s blastz.dm3.swap /hive/data/genomes/droMoj3/bed/blastz.dm3 #########################################################################