######################################################################### # Cavia Porcellus -- Guinea Pig # Broad cavPor2 2X release ######################################################################### # Download sequence (2006-03-03 kate) ssh kkstore05 mkdir -p /cluster/store12/cavPor2 ln -s /cluster/store12/cavPor2 /cluster/data/cavPor2 cd /cluster/data/cavPor2 mkdir downloads cd downloads wget -r ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor2 mv ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor2/* . rm -fr ftp.broad.mit.edu ######################################################################### # Create .ra file and run makeGenomeDb.pl ssh hgwdev cd /cluster/data/cavPor2 cat << '_EOF_' >cavPor2.config.ra # Config parameters for makeGenomeDb.pl: db cavPor2 clade mammal genomeCladePriority 35 scientificName Cavia porcellus commonName GuineaPig assemblyDate Oct. 2005 assemblyLabel Broad Institute cavPor2 (Draft_v2) orderKey 99 #mitoAcc AJ222767 mitoAcc 5679797 fastaFiles /cluster/data/cavPor2/downloads/assembly.bases.gz agpFiles /cluster/data/cavPor2/downloads/assembly.agp qualFiles /cluster/data/cavPor2/downloads/scaffold.lifted.qac dbDbSpeciesDir lizard '_EOF_' makeGenomeDb.pl -verbose=2 -stop=seq cavPor2.config.ra > makeGenomeDb.out & # Need dbDb entry for name lookup hgsql -e 'INSERT INTO dbDb (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk, hgPbOk, sourceName) \ VALUES("cavPor2", "Oct. 2005", "/gbdb/cavPor2", "GuineaPig", \ "", 0, 99, "GuineaPig", \ "Cavia porcellus", "", 0, 0, \ "Broad Institute cavPor2 (Draft_v2)")' -h localhost hgcentraltest ################################################ ## WINDOWMASKER (2006-03-04 kate) ssh kkstore05 cd /cluster/data/cavPor2/bed/ nice /cluster/bin/scripts/doWindowMasker.pl cavPor2 \ -workhorse=kolossus >& wmRun.log & # 10 hours -- requested Andy alter script to only # perform sdust run -- this should halve the time ln -s WindowMasker.2007-03-04 WMRun mv wmRun.log WMRun cd WMRun # upper-case n's left by WM (request to Andy to fix this) twoBitToFa cavPor2.wmsk.sdust.2bit stdout | tr n N | \ faToTwoBit stdin /cluster/data/cavPor2/cavPor2.2bit # stats on masking cd /cluster/data/cavPor2 twoBitToFa cavPor2.2bit stdout | nice faSize stdin # 3403705911 bases (1454046733 N's 1949659178 real 1277828007 upper 671831171 lower) in 295515 sequences in 1 files # Total size: mean 11517.9 sd 25705.3 min 201 (scaffold_113479) max 1027677 (scaffold_289404) median 4132 # N count: mean 4920.4 sd 14535.3 # U count: mean 4324.1 sd 9878.2 # L count: mean 2273.4 sd 4659.3 # 34.4 % masked # Mouse and rat are ~43%, from RM. TRF gives them an extra 2%, skip here mkdir -p /san/sanvol1/scratch/cavPor2 cp -p cavPor2.2bit chrom.sizes /san/sanvol1/scratch/cavPor2 ################################################ # DOWNLOADS (2007-06-05 kate) ssh kkstore05 cd /cluster/data/cavPor2 mkdir bigZips cd bigZips nice twoBitToFa ../cavPor2.2bit cavPor2.fa cp ../downloads/assembly.agp cavPor2.agp nice gzip cavPor2.fa cavPor2.agp md5sum *.gz > md5sum.txt ssh hgwdev set d = /usr/local/apache/htdocs/goldenPath set bd = /cluster/data/cavPor2 cp $d/sorAra1/bigZips/README.txt $bd/bigZips # EDIT mkdir -p $d/cavPor2/bigZips ln -s $bd/bigZips/{*.gz,md5sum.txt,README.txt} $d/cavPor2/bigZips