# for emacs: -*- mode: sh; -*- # This file describes browser build for the papAnu2 # Olive Baboon - Papio anubis - Mar 2012 # # Note: The previous assembly papHam1 is from the same animal # used for this assembly. # http://www.ncbi.nlm.nih.gov/genome/394 # http://www.ncbi.nlm.nih.gov/genome/assembly/383538/ # http://www.ncbi.nlm.nih.gov/bioproject/54005 - Baylor # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AHZZ01 # Genome Coverage : 2.5x Sanger; 4.5x 454; 85x Illumina # no result for "Papio anubis mitochondrion complete genome" search # DATE: 21-Mar-2012 # ORGANISM: Papio anubis # TAXID: 9555 # ASSEMBLY LONG NAME: Panu_2.0 # ASSEMBLY SHORT NAME: Panu_2.0 # ASSEMBLY SUBMITTER: Baylor College of Medicine # ASSEMBLY TYPE: Haploid # NUMBER OF ASSEMBLY-UNITS: 1 # ASSEMBLY ACCESSION: GCA_000264685.1 # # FTP-RELEASE DATE: 07-Jun-2012 # Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; # Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; # Cercopithecidae; Cercopithecinae; Papio; Papio anubis ############################################################################# # Fetch sequence from genbank (DONE - 2012-07-19 - Chin) mkdir -p /hive/data/genomes/papAnu2/genbank cd /hive/data/genomes/papAnu2/genbank time rsync -a -P rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Papio_anubis/Panu_2.0/ ./ # real 33m19.751s # measure total sequence in this assembly faSize Primary_Assembly/assembled_chromosomes/FASTA/*.fa.gz \ Primary_Assembly/unplaced_scaffolds/FASTA/*.fa.gz # 2948380710 bases (55130419 N's 2893250291 real 2893250291 upper 0 lower) # in 63250 sequences in 22 files # Total size: mean 46614.7 sd 2497441.4 # min 200 (gi|388614875|gb|AHZZ01198925.1|) # max 220367699 (gi|390124480|gb|CM001491.1|) median 1416 # %0.00 masked total, %0.00 masked real ############################################################################# # process into UCSC naming scheme (DONE - 2012-07-19 - Chin) mkdir /hive/data/genomes/papAnu2/ucsc cd /hive/data/genomes/papAnu2/ucsc ######################## Assembled Chromosomes cat << '_EOF_' > toUcsc.pl #!/bin/env perl use strict; use warnings; my %accToChr; open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or die "can not read Primary_Assembly/assembled_chromosomes/chr2acc"; while (my $line = ) { next if ($line =~ m/^#/); chomp $line; my ($chrN, $acc) = split('\s+', $line); $accToChr{$acc} = $chrN; } close (FH); foreach my $acc (keys %accToChr) { my $chrN = $accToChr{$acc}; print "$acc $accToChr{$acc}\n"; open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.agp.gz|") or die "can not read chr${chrN}.agp.gz"; open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp"; while (my $line = ) { if ($line =~ m/^#/) { print UC $line; } else { $line =~ s/^$acc/chr${chrN}/; print UC $line; } } close (FH); close (UC); open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz"; open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa"; while (my $line = ) { if ($line =~ m/^>/) { printf UC ">chr${chrN}\n"; } else { print UC $line; } } close (FH); close (UC); } '_EOF_' # << happy emacs chmod +x toUcsc.pl time ./toUcsc.pl # real 1m26.505s time gzip *.fa *.agp # real real 13m40.080s faSize chr*.fa.gz # 2724327674 bases (42062585 N's 2682265089 real 2682265089 upper 0 lower) # in 21 sequences in 21 files # Total size: mean 129729889.2 sd 45381077.9 min 51301725 (chr19) # max 220367699 (chr1) median 128036923 # %0.00 masked total, %0.00 masked rea ######################## Unplaced scaffolds # verify we don't have any .acc numbers different from .1 # see oreNil2 for xxxx.1 and xxxx.2 cases zcat ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \ | cut -f1 | egrep "^JH|AHZZ" \ | sed -e 's/^JH[0-9][0-9]*//; s/^AHZZ[0-9][0-9]*//' | sort | uniq -c # 96735 .1 # this is like the unplaced.pl script in other assemblies except it # does not add chrUn_ to the names since they are all just scaffolds cat << '_EOF_' > unplaced.pl #!/bin/env perl use strict; use warnings; my $agpFile = "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz"; my $fastaFile = "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz"; open (FH, "zcat $agpFile|") or die "can not read $agpFile"; open (UC, ">unplaced.agp") or die "can not write to unplaced.agp"; while (my $line = ) { if ($line =~ m/^#/) { print UC $line; } else { $line =~ s/\.1//; printf UC "%s", $line; } } close (FH); close (UC); open (FH, "zcat $fastaFile|") or die "can not read $fastaFile"; open (UC, ">unplaced.fa") or die "can not write to unplaced.fa"; while (my $line = ) { if ($line =~ m/^>/) { chomp $line; $line =~ s/.*gb\|//; $line =~ s/\.1\|.*//; printf UC ">$line\n"; } else { print UC $line; } } close (FH); close (UC); '_EOF_' # << happy emacs chmod +x unplaced.pl time ./unplaced.pl # real 0m6.265s # -rw-rw-r-- 1 4703550 Jul 20 10:47 unplaced.agp # -rw-rw-r-- 1 228138466 Jul 20 10:47 unplaced.fa time gzip *.fa *.agp # real 0m50.019s # verify nothing lost in the translation, should be the same as above # except for the name translations faSize *.fa.gz # 2948380710 bases (55130419 N's 2893250291 real 2893250291 upper 0 lower) # in 63250 sequences in 22 files # Total size: mean 46614.7 sd 2497441.4 min 200 (AHZZ01198925) # max 220367699 (chr1) median 1416 ############################################################################# # Initial browser build (DONE - 2012-07-20 - Chin) cd /hive/data/genomes/papAnu2 cat << '_EOF_' > papAnu2.config.ra # Config parameters for makeGenomeDb.pl: db papAnu2 clade mammal genomeCladePriority 16 scientificName Papio anubis commonName Baboon assemblyDate Mar. 2012 assemblyLabel Baylor College of Medicine assemblyShortLabel Baylor Panu_2.0 orderKey 391 mitoAcc none fastaFiles /hive/data/genomes/papAnu2/ucsc/*.fa.gz agpFiles /hive/data/genomes/papAnu2/ucsc/*.agp.gz dbDbSpeciesDir baboon photoCreditURL http://www.oumedicine.com/pathology/general-program-info/faculty-staff/roman-f-wolf-dvm photoCreditName Roman Wolf, University of Oklahoma Health Sciences Center ncbiGenomeId 394 ncbiAssemblyId 383538 ncbiAssemblyName Panu_2.0 ncbiBioProject 54005 genBankAccessionID GCA_000264685.1 taxId 9555 '_EOF_' # << happy emacs # verify sequence and agp are OK time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \ -stop=agp papAnu2.config.ra > agp.log 2>&1 & # real 4m34.068s # verify OK: tail -1 agp.log # *** All done! (through the 'agp' stepm # finish it off time makeGenomeDb.pl -continue=db -workhorse=hgwdev -fileServer=hgwdev \ -dbHost=hgwdev papAnu2.config.ra > db.log 2>&1 & # real 22m27.336s # add the trackDb entries to the source tree, and the 2bit link: ln -s `pwd`/papAnu2.unmasked.2bit /gbdb/papAnu2/papAnu2.2bit # browser should function now # manually run twoBitDup to detect dup twoBitDup /gbdb/papAnu2/papAnu2.2bit # cd ~/kent/src/hg/makeDb/trackDb # edit makefile to add papAnu2 to DBS. # git add baboon/papAnu3/*.{ra,html} # git commit -m "Added papAnu2 to DBS." makefile # git commit -m "Initial descriptions for papAnu2." baboon/papAnu2/*.{ra,html} # git pull; git push # Run make update DBS=papAnu2 and make alpha when done. # (optional) Clean up /cluster/data/papAnu2/TemporaryTrackDbCheckout # browser should function now #check in papAnu2.config.ra to src/hg/utils/automation/configFiles ########################################################################## # running repeat masker (DONE - 2012-07-i21 - Chin) mkdir /hive/data/genomes/papAnu2/bed/repeatMasker cd /hive/data/genomes/papAnu2/bed/repeatMasker time doRepeatMasker.pl -buildDir=`pwd` -noSplit \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=encodek papAnu2 > do.log 2>&1 & # real 1354m59.921s cat faSize.rmsk.txt # 2948380710 bases (55130419 N's 2893250291 real 1378673208 upper # 1514577083 lower) in 63250 sequences in 1 files # Total size: mean 46614.7 sd 2497441.4 min 200 (AHZZ01198925) # max 220367699 (chr1) median 1416 # %51.37 masked total, %52.35 masked real egrep -i "versi|relea" do.log # RepeatMasker version development-$Id: RepeatMasker,v 1.26 # 2011/09/26 16:19:44 angie Exp $ # grep version of RepeatMasker$ /scratch/data/RepeatMasker/RepeatMasker # April 26 2011 (open-3-3-0) version of RepeatMasker # grep RELEASE /scratch/data/RepeatMasker/Libraries/RepeatMaskerLib.embl # CC RELEASE 20110920; featureBits -countGaps papAnu2 rmsk # 1516030289 bases of 2948380710 (51.419%) in intersection # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, the count above # separates out the N's from the bases, it doesn't show lower case N's ########################################################################## # running simple repeat (DONE - 2012-07-23 - Chin) mkdir /hive/data/genomes/papAnu2/bed/simpleRepeat cd /hive/data/genomes/papAnu2/bed/simpleRepeat time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \ papAnu2 > do.log 2>&1 & # real 58m51.627s cat fb.simpleRepeat # 162539257 bases of 2934388335 (5.539%) in intersection # add to rmsk after it is done: cd /hive/data/genomes/papAnu2 twoBitMask papAnu2.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed papAnu2.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa papAnu2.2bit stdout | faSize stdin > faSize.papAnu2.2bit.txt cat faSize.papAnu2.2bit.txt # 2948380710 bases (55130419 N's 2893250291 real 1377491621 upper # 1515758670 lower) in 63250 sequences in 1 files # %51.41 masked total, %52.39 masked real rm /gbdb/papAnu2/papAnu2.2bit ln -s `pwd`/papAnu2.2bit /gbdb/papAnu2/papAnu2.2bit ######################################################################### # Verify all gaps are marked, add any N's not in gap as type 'other' # (DONE - 2012-07-24 - Chin) mkdir /hive/data/genomes/papAnu2/bed/gap cd /hive/data/genomes/papAnu2/bed/gap time nice -n +19 findMotif -motif=gattaca -verbose=4 \ -strand=+ ../../papAnu2.unmasked.2bit > findMotif.txt 2>&1 # real 1m17.322s grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed featureBits papAnu2 -not gap -bed=notGap.bed # 2934388335 bases of 2934388335 (100.000%) in intersection time featureBits papAnu2 allGaps.bed notGap.bed -bed=new.gaps.bed # 41138044 bases of 2934388335 (1.402%) in intersection # real 32m23.676s # what is the highest index in the existing gap table: hgsql -N -e "select ix from gap;" papAnu2 | sort -n | tail -1 # 1430 cat << '_EOF_' > mkGap.pl #!/bin/env perl use strict; use warnings; my $ix=`hgsql -N -e "select ix from gap;" papAnu2 | sort -n | tail -1`; chomp $ix; open (FH,") { my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line); ++$ix; printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart, $chromEnd, $ix, $chromEnd-$chromStart; } close (FH); '_EOF_' # << happy emacs chmod +x ./mkGap.pl ./mkGap.pl > other.bed featureBits -countGaps papAnu2 other.bed # 41138044 bases of 2948380710 (1.395%) in intersection wc -l other.bed # 113636 hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \ -noLoad papAnu2 otherGap other.bed # starting with this many hgsql -e "select count(*) from gap;" papAnu2 # 26003 hgsql papAnu2 -e 'load data local infile "bed.tab" into table gap;' # result count: hgsql -e "select count(*) from gap;" papAnu2 # 139639 # == 26003 + 113636 # verify we aren't adding gaps where gaps already exist # this would output errors if that were true: gapToLift -minGap=1 papAnu2 nonBridged.lift -bedFile=nonBridged.bed # see example in danRer7.txt # there are non-bridged gaps here: hgsql -N -e "select bridge from gap;" papAnu2 | sort | uniq -c # 9250 no # 130389 yes ########################################################################## ## WINDOWMASKER (DONE - 2012-07-24 - Chin) mkdir /hive/data/genomes/papAnu2/bed/windowMasker cd /hive/data/genomes/papAnu2/bed/windowMasker time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev papAnu2 > do.log 2>&1 & # real 225m4.246s # Masking statistics twoBitToFa papAnu2.wmsk.2bit stdout | faSize stdin # 2948380710 bases (55130419 N's 2893250291 real 1783065601 upper # 1110184690 lower) in 63250 sequences in 1 files # Total size: mean 46614.7 sd 2497441.4 min 200 (AHZZ01198925) # max 220367699 (chr1) median 1416 # %37.65 masked total, %38.37 masked real hgLoadBed papAnu2 windowmaskerSdust windowmasker.sdust.bed.gz # Read 15750029 elements of size 3 from windowmasker.sdust.bed.gz featureBits -countGaps papAnu2 windowmaskerSdust # 1181777644 bases of 2948380710 (40.082%) in intersection # eliminate the gaps from the masking featureBits papAnu2 -not gap -bed=notGap.bed # 2893250291 bases of 2893250291 (100.000%) in intersection time nice -n +19 featureBits papAnu2 windowmaskerSdust notGap.bed \ -bed=stdout | gzip -c > cleanWMask.bed.gz # 1126649532 bases of 2893250291 (38.941%) in intersection # real 35m51.398s # reload track to get it clean hgLoadBed papAnu2 windowmaskerSdust cleanWMask.bed.gz # Read 15783704 elements of size 4 from cleanWMask.bed.gz # Sorted # Creating table definition for windowmaskerSdust # Saving bed.tab # Loading papAnu2 featureBits -countGaps papAnu2 windowmaskerSdust # 1126649532 bases of 2948380710 (38.212%) in intersection zcat cleanWMask.bed.gz \ | twoBitMask ../../papAnu2.unmasked.2bit stdin \ -type=.bed papAnu2.cleanWMSdust.2bit twoBitToFa papAnu2.cleanWMSdust.2bit stdout | faSize stdin \ > papAnu2.cleanWMSdust.faSize.txt cat papAnu2.cleanWMSdust.faSize.txt # 2948380710 bases (55130419 N's 2893250291 real 1766600759 upper # 1126649532 lower) in 63250 sequences in 1 files # Total size: mean 46614.7 sd 2497441.4 min 200 (AHZZ01198925) # max 220367699 (chr1) median 1416 # %38.21 masked total, %38.94 masked real # how much does this window masker and repeat masker overlap: featureBits -countGaps papAnu2 rmsk windowmaskerSdust # 901904342 bases of 2948380710 (30.590%) in intersection ######################################################################### # MASK SEQUENCE WITH WM+TRF (DONE - 2012-08-07 - Chin) # since rmsk masks so very little of the genome, use the clean WM mask # on the genome sequence cd /hive/data/genomes/papAnu2 twoBitMask -add bed/windowMasker/papAnu2.cleanWMSdust.2bit \ bed/simpleRepeat/trfMask.bed papAnu2.2bit # safe to ignore the warnings about BED file with >=13 fields twoBitToFa papAnu2.2bit stdout | faSize stdin > faSize.papAnu2.txt cat faSize.papAnu2.txt # 2948380710 bases (55130419 N's 2893250291 real 1766238769 upper # 1127011522 lower) in 63250 sequences in 1 files # Total size: mean 46614.7 sd 2497441.4 min 200 (AHZZ01198925) # max 220367699 (chr1) median 1416 # ma%38.22 masked total, %38.95 masked real # create symlink to gbdb ssh hgwdev rm /gbdb/papAnu2/papAnu2.2bit ln -s `pwd`/papAnu2.2bit /gbdb/papAnu2/papAnu2.2bit ######################################################################## # cpgIslands - (DONE 2012-08-08 - Chin) mkdir /hive/data/genomes/papAnu2/bed/cpgIslands cd /hive/data/genomes/papAnu2/bed/cpgIslands time doCpgIslands.pl papAnu2 > do.log 2>&1 # Elapsed time: 20m50s time featureBits papAnu2 cpgIslandExt > fb.papAnu2.cpgIslandExt.txt 2>&1 # real 0m22.345s cat fb.papAnu2.cpgIslandExt.txt # 18257085 bases of 2893250291 (0.631%) in intersection ######################################################################### # genscan - (DONE 2012-09-06 - Chin) mkdir /hive/data/genomes/papAnu2/bed/genscan cd /hive/data/genomes/papAnu2/bed/genscan time doGenscan.pl papAnu2 > do.log 2>&1 # real 85m40.061s para time # 63250 jobs in batch # 74 jobs (including everybody's) in Parasol queue or running. # Checking finished jobs # Completed: 63249 of 63250 jobs # Crashed: 1 jobs # Failed job: # ./runGsBig.csh chr19 000 gtf/000/chr19.gtf pep/000/chr19.pep subopt/000/chr19.bed # set -window=1200000 in runGsBig.csh and manually run again. Success. # before continue, create a run.time file on swarm ssh swarm cd /hive/data/genomes/papAnu2/bed/genscan para check para time > run.time # continuing: time doGenscan.pl -continue=makeBed papAnu2 > makeBed.log 2>&1 # Elapsed time: 29m46s cat fb.papAnu2.genscan.txt # 40920336 bases of 2893250291 (1.414%) in intersection cat fb.papAnu2.genscanSubopt.txt # 49808228 bases of 2893250291 (1.722%) in intersection ######################################################################### # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-10-23 - Chin) # Use -repMatch=900, based on size -- for human we use 1024 # use the "real" number from the faSize measurement, # hg19 is 2897316137, calculate the ratio factor for 1024: calc \( 2893250291 / 2897316137 \) \* 1024 # ( 2893250291 / 2897316137 ) * 1024 = 1022.563006 # round up to 1025 cd /hive/data/genomes/papAnu2 time blat papAnu2.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/papAnu2.11.ooc -repMatch=1025 # Wrote 29128 overused 11-mers to jkStuff/papAnu2.11.ooc # Done making jkStuff/papAnu2.11.ooc # real 1m41.286s # there are non-bridged gaps, construct lift file needed for genbank hgsql -N -e "select bridge from gap;" papAnu2 | sort | uniq -c # 9250 no # 130389 yes cd /hive/data/genomes/papAnu2/jkStuff gapToLift papAnu2 papAnu2.nonBridged.lift -bedFile=papAnu2.nonBridged.bed # largest non-bridged contig: awk '{print $3-$2,$0}' papAnu2.nonBridged.bed | sort -nr | head # 3614795 chr4 105018890 108633685 chr4.306 ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-10-23 - Chin) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Papio anubis 227 145582 462 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add papAnu2 just ibefore papHam1 # papAnu2 (baboon) papAnu2.serverGenome = /hive/data/genomes/papAnu2/papAnu2.2bit papAnu2.clusterGenome = /hive/data/genomes/papAnu2/papAnu2.2bit papAnu2.ooc = /hive/data/genomes/papAnu2/jkStuff/papAnu2.11.ooc papAnu2.lift = /hive/data/genomes/papAnu2/jkStuff/papAnu2.nonBridged.lift papAnu2.perChromTables = no papAnu2.refseq.mrna.native.pslCDnaFilter = ${ordered.refseq.mrna.native.pslCDnaFilter} papAnu2.refseq.mrna.xeno.pslCDnaFilter = ${ordered.refseq.mrna.xeno.pslCDnaFilter} papAnu2.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter} papAnu2.genbank.mrna.xeno.pslCDnaFilter = ${ordered.genbank.mrna.xeno.pslCDnaFilter} papAnu2.genbank.est.native.pslCDnaFilter = ${ordered.genbank.est.native.pslCDnaFilter} papAnu2.genbank.est.xeno.pslCDnaFilter = ${ordered.genbank.est.xeno.pslCDnaFilter} papAnu2.downloadDir = papAnu2 papAnu2.refseq.mrna.native.load = yes papAnu2.refseq.mrna.xeno.load = yes papAnu2.genbank.mrna.xeno.load = yes papAnu2.genbank.mrna.xeno.loadDesc = yes papAnu2.genbank.est.native.load = yes # end of section added to etc/genbank.conf git commit -m "adding papAnu2 Papio anubis" etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for papAnulNames" \ src/lib/gbGenome.c git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S papAnu2 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial papAnu2 & # var/build/logs/2012.06.08-09:52:19.papAnu2.initalign.log # real 1641m58.577s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad papAnu2 & # logFile: var/dbload/hgwdev/logs/2012.10.26-10:34:02.dbload.log # real 106m43.697s # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank git pull # add papAnu2 to: vi etc/align.dbs etc/hgwdev.dbs git commit -m "Added papAnu2." etc/align.dbs etc/hgwdev.dbs git push make etc-update ########################################################################## # BLATSERVERS ENTRY (DONE - 2012-10-19 - Chin) # After getting a blat server assigned by the Blat Server Gods, # blat4c # port 17838 trans # port 17839 untrans ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("papAnu2", "blat4c", "17838", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("papAnu2", "blat4c", "17839", "0", "1");' \ hgcentraltest # test it with some sequence ######################################################################### # all.joiner update, downloads and in pushQ - (DONE 2012-10-30 - Chin) cd $HOME/kent/src/hg/makeDb/schema # fixup all.joiner until this is a clean output # add melGal after galGal joinerCheck -database=papAnu2 -all all.joiner cd /hive/data/genomes/papAnu2 time makeDownloads.pl -workhorse=hgwdev papAnu2 > makeDownloads.log 2>&1 # real 12m40.799s *** All done! *** Please take a look at the downloads for papAnu2 using a web browser. *** The downloads url is: http://hgdownload-test.cse.ucsc.edu/goldenPath/papAnu2. *** Edit each README.txt to resolve any notes marked with "***": /cluster/data/papAnu2/goldenPath/database/README.txt /cluster/data/papAnu2/goldenPath/bigZips/README.txt (The htdocs/goldenPath/papAnu2/*/README.txt "files" are just links to those.) *** If you have to make any edits that would always apply to future assemblies from the same sequencing center, please edit them into ~/kent/src/hg/utils/automation/makeDownloads.pl (or ask Angie for help). # now ready for pushQ entry mkdir /hive/data/genomes/papAnu2/pushQ cd /hive/data/genomes/papAnu2/pushQ time makePushQSql.pl papAnu2 > papAnu2.pushQ.sql 2> stderr.out # real 3m39.043s # check for errors in stderr.out, some are OK, e.g.: # WARNING: hgwdev does not have /gbdb/papAnu2/wib/gc5Base.wib # WARNING: hgwdev does not have /gbdb/papAnu2/wib/quality.wib # WARNING: hgwdev does not have /gbdb/papAnu2/bbi/quality.bw # WARNING: papAnu2 does not have seq # WARNING: papAnu2 does not have extFile # # copy it to hgwbeta scp -p papAnu2.pushQ.sql hgwbeta:/tmp ssh hgwbeta "hgsql qapushq < /tmp/papAnu2.pushQ.sql" # in that pushQ entry walk through each track entry and see if the table # sizes will set properly ############################################################################# # LIFTOVER TO papHam1 (DONE - 2013-01-16 - Chin) sh hgwdev cd /hive/data/genomes/papAnu2 ln -s jkStuff/papAnu2.11.ooc 11.ooc screen time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ papAnu2 papHam1 > doLiftOverToPapHam1.log 2>&1 & # real 2563m6.343s pushd /usr/local/apache/htdocs-hgdownload/goldenPath/papAnu2/liftOver/ md5sum papAnu2ToPapHam1.over.chain.gz >> md5sum.txt popd #########################################################################