# for emacs: -*- mode: sh; -*- # Creating the assembly for Macropus eugenii # Tammar wallaby # http://www.hgsc.bcm.tmc.edu/projects/wallaby/ # # This isn't a full browser. Just a 2x coverage genome # for use in comparative genomics. ######################################################################### # DOWNLOAD SEQUENCE, ASSEMBLE, MAKE DB (2008-12-04 Andy) ssh hgwdev mkdir -p /hive/data/genomes/macEug1/baylor/chroms cd /hive/data/genomes/macEug1/baylor wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/contigs/Meug20071125-contigs*" cd chroms/ wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/linearScaffolds/*" cd ../ zcat Meug20071125-contigs.fa.gz | grep "^>" | \ sed 's/^>//' | tr '|' '\t' | awk 'BEGIN{OFS="\t"}{print $2, $1;}' > contigs.lst tabGrep -replace contigs.lst 6 Meug20071125-contigs.agp.gz > macEug1.agp zcat Meug20071125-contigs.fa.gz | sed 's/^>\(Contig[[:digit:]]\+\).*/>\1/' \ | gzip -c > macEug1.contigs.fa.gz cut -f6 macEug1.agp > contigs.agp.lst faSomeRecords macEug1.contigs.fa.gz contigs.agp.lst stdout | gzip -c > tmp.fa.gz mv tmp.fa.gz macEug1.contigs.fa.gz cd ../ cat << '_EOF_' > macEug1.config.ra db macEug1 clade mammal scientificName Macropus eugenii assemblyDate Nov. 2007 assemblyLabel Baylor Release 1.0 orderKey 278 dbDbSpeciesDir wallaby mitoAcc none agpFiles /hive/data/genomes/macEug1/baylor/macEug1.agp fastaFiles /hive/data/genomes/macEug1/baylor/macEug1.contigs.fa.gz commonName Wallaby '_EOF_' # << emacs tcsh ~/kent/src/hg/utils/automation/makeGenomeDb.pl -stop db -workhorse=hgwdev ########################################################################### # WINDOWMASKER (DONE 2008-12-05 Andy) ssh hgwdev mkdir /hive/data/genomes/macEug1/bed/WindowMasker screen -S macEug1_WindowMasker ssh kolossus cd /hive/data/genomes/macEug1/bed/WindowMasker tcsh ~/kent/src/hg/utils/automation/doWindowMasker.pl macEug1 -buildDir=`pwd` -workhorse=hgwdev >& wm.log rm macEug1.wmsk.2bit cd ../../ ln -s bed/WindowMasker/macEug1.wmsk.sdust.2bit macEug1.wmsk.2bit ######################################################################### ## SIMPLE REPEATS TRF (DONE 2008-11-26 - Andy) ssh hgwdev screen -S macEug1_trf mkdir /hive/data/genomes/macEug1/bed/simpleRepeat cd /hive/data/genomes/macEug1/bed/simpleRepeat tcsh time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl -buildDir=`pwd` macEug1 > do.log # *** All done! # *** Steps were performed in /hive/data/genomes/macEug1/bed/simpleRepeat #0.559u 0.401s 6:56:42.34 0.0% 0+0k 0+0io 2pf+0w cat fb.simpleRepeat # 38751844 bases of 2541767339 (1.525%) in intersection cd ../../ twoBitMask macEug1.wmsk.2bit -add bed/simpleRepeat/trfMask.bed macEug1.2bit ## can safely ignore warning about >=13 fields in bed file twoBitToFa macEug1.2bit stdout | faSize stdin > macEug1.2bit.faSize.txt # # %31.28 masked total, %36.24 masked real ## link to gbdb ln -s `pwd`/macEug1.2bit /gbdb/macEug1 ######################################################################### ## Genbank gene run (DONE - 2009-09-25 - Hiram) cd /hive/data/genomes/macEug1 # MAKE 11.OOC FILE FOR BLAT blat macEug1.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/macEug1.11.ooc \ -repMatch=1024 # Wrote 24249 overused 11-mers to jkStuff/macEug1.11.ooc mkdir /hive/data/staging/data/macEug1 cp -p macEug1.2bit chrom.sizes jkStuff/macEug1.11.ooc \ /hive/data/staging/data/macEug1 # add the following two lines to src/lib/gbGenome.c # static char *macEugNames[] = {"Macropus eugenii", NULL}; # {"macEug", macEugNames}, cvs ci -m "Adding Wallaby Macropus eugenii" src/lib/gbGenome.c make install-server ssh genbank screen # control this business with a screen since it takes a while cd /cluster/data/genbank time nice -n +19 bin/gbAlignStep -initial macEug1 & # logFile: var/build/logs/2009.09.25-14:54:03.macEug1.initalign.log # real 2613m8.027s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad macEug1 # logFile: var/dbload/hgwdev/logs/2009.09.28-09:23:52.dbload.log # real 15m28.552s # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank cvsup # add macEug1 to: etc/align.dbs etc/hgwdev.dbs cvs ci -m "Added macEug1 - Macropus eugenii - Wallaby" etc/align.dbs etc/hgwdev.dbs make etc-update # done - 2009-09-28 - Hiram