# for emacs: -*- mode: sh; -*- # # the above keeps emacs happy while working with this text document # This file describes how we made the browser database on the # Melopsittacus undulatus - budgerigar # WashU St. Louis - http://genome.wustl.edu/ # http://www.ncbi.nlm.nih.gov/bioproject/72527 # http://www.ncbi.nlm.nih.gov/genome/10765 # http://www.ncbi.nlm.nih.gov/genome/assembly/325078/ # http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AGAI01 # Genome Coverage : 16x 454; 7x Illumina # http://www.ncbi.nlm.nih.gov/bioproject/19105 - chrMt # chrMt NC_009134.1 - EF450826.1 # DATE: 13-Sep-2011 # ORGANISM: Melopsittacus undulatus # TAXID: 13146 # ASSEMBLY LONG NAME: Melopsittacus undulatus Budgerigar Version 6.3 # ASSEMBLY SHORT NAME: Melopsittacus_undulatus_6.3 # ASSEMBLY SUBMITTER: Washington University School of Medicine # ASSEMBLY TYPE: Haploid # NUMBER OF ASSEMBLY-UNITS: 1 # ASSEMBLY ACCESSION: GCA_000238935.1 # FTP-RELEASE DATE: 02-Jan-2012 # Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Coelomata; # Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; # Teleostomi; Euteleostomi; Sarcopterygii; Tetrapoda; Amniota; # Sauropsida; Sauria; Archosauria; Dinosauria; Saurischia; Theropoda; # Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; # Melopsittacus ######################################################################### ## Download sequence (DONE - 2012-03-26 - Hiram) mkdir /hive/data/genomes/melUnd1 mkdir /hive/data/genomes/melUnd1/genbank cd /hive/data/genomes/melUnd1/genbank time rsync -a -P \ rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_other/Melopsittacus_undulatus/Melopsittacus_undulatus_6.3/ ./ # real 4m16.584s faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz # 1117355426 bases (30758804 N's 1086596622 real 1086596622 upper 0 lower) # in 25211 sequences in 1 files # Total size: mean 44320.2 sd 774121.2 min 228 (gi|366922013|gb|JH533144.1|) # max 39887635 (gi|366898540|gb|JH556617.1|) median 1201 faCount Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \ > faCount.txt egrep -v "^#seq|^total" faCount.txt \ | awk '{printf "%s\t%d\n", $1, $2}' \ | sed -e 's/^gi.*gb|//; s/|//' | sort -k2,2nr > chrom.sizes n50.pl chrom.sizes # reading: chrom.sizes # contig count: 25211, total size: 1117355426, one half size: 558677713 # cumulative N50 count contig contig size 549534897 27 JH556592.1 11405270 558677713 one half size 560149280 28 JH556573.1 10614383 ######################################################################### # process into UCSC naming scheme (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/melUnd1/ucsc cd /hive/data/genomes/melUnd1/ucsc # verify we don't have any .acc numbers different from .1 zcat \ ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \ | cut -f1 | egrep "^JH|AGAI" \ | sed -e 's/^JH[0-9][0-9]*//; s/^AGAI[0-9][0-9]*//' | sort | uniq -c # 116513 .1 # this is like the unplaced.pl script in other assemblies except it # does not add chrUn_ to the names since they are all just scaffolds cat << '_EOF_' > unplaced.pl #!/bin/env perl use strict; use warnings; my $agpFile = "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz"; my $fastaFile = "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz"; open (FH, "zcat $agpFile|") or die "can not read $agpFile"; open (UC, "|gzip -c > unplaced.agp.gz") or die "can not write to unplaced.agp.gz"; while (my $line = ) { if ($line =~ m/^#/) { print UC $line; } else { $line =~ s/\.1//; printf UC "%s", $line; } } close (FH); close (UC); open (FH, "zcat $fastaFile|") or die "can not read $fastaFile"; open (UC, "|gzip -c > unplaced.fa.gz") or die "can not write to unplaced.fa.gz"; while (my $line = ) { if ($line =~ m/^>/) { chomp $line; $line =~ s/.*gb\|//; $line =~ s/\.1\|.*//; printf UC ">$line\n"; } else { print UC $line; } } close (FH); close (UC); '_EOF_' # << happy emacs chmod +x unplaced.pl time ./unplaced.pl # real 5m23.124s # -rw-rw-r-- 1 1483871 Mar 27 12:29 unplaced.agp.gz # -rw-rw-r-- 1 332029817 Mar 27 12:34 unplaced.fa.gz # verify nothing lost in the translation, should be the same as above # except for the name translations faSize *.fa.gz # 1117355426 bases (30758804 N's 1086596622 real 1086596622 upper 0 lower) # in 25211 sequences in 1 files # Total size: mean 44320.2 sd 774121.2 min 228 (JH533144) # max 39887635 (JH556617) median 1201 ############################################################################# # Initial database build (DONE - 2012-03-27 - Hiram) cd /hive/data/genomes/melUnd1 cat << '_EOF_' > melUnd1.config.ra # Config parameters for makeGenomeDb.pl: db melUnd1 clade vertebrate genomeCladePriority 60 scientificName Melopsittacus undulatus commonName Budgerigar assemblyDate Sep. 2011 assemblyLabel WUSTL Melopsittacus undulatus Budgerigar Version 6.3 (GCA_000238935.1) assemblyShortLabel WUSTL v6.3 ncbiAssemblyName Melopsittacus_undulatus_6.3 ncbiAssemblyId 325078 orderKey 4375 mitoAcc NC_009134 fastaFiles /hive/data/genomes/melUnd1/ucsc/unplaced.fa.gz agpFiles /hive/data/genomes/melUnd1/ucsc/unplaced.agp.gz dbDbSpeciesDir birds taxId 13146 '_EOF_' # << happy emacs # first verify the sequence and AGP files are OK time makeGenomeDb.pl -stop=agp -workhorse=hgwdev melUnd1.config.ra \ > agp.log 2>&1 # real 1m24.027s # verify that was OK, look at the agp.log file time makeGenomeDb.pl -continue=db -workhorse=hgwdev melUnd1.config.ra \ > db.log 2>&1 # real 8m11.569s # verify that was OK, look at the do.log file # copy the trackDb business to the source tree, check it in and add # to the trackDb/makefile ############################################################################# # running repeat masker (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/repeatMasker cd /hive/data/genomes/melUnd1/bed/repeatMasker time doRepeatMasker.pl -buildDir=`pwd` -noSplit \ -bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=encodek melUnd1 > do.log 2>&1 & # real 120m5.509s cat faSize.rmsk.txt # 1117373619 bases (30758804 N's 1086614815 real 1006316728 upper # 80298087 lower) in 25212 sequences in 1 files # Total size: mean 44319.1 sd 774105.8 min 228 (JH533144) # max 39887635 (JH556617) median 1201 # %7.19 masked total, %7.39 masked real grep -i versi do.log # RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $ # April 26 2011 (open-3-3-0) version of RepeatMasker featureBits -countGaps melUnd1 rmsk # 80384935 bases of 1117373619 (7.194%) in intersection # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, this featureBits count # separates out the N's from the bases, it doesn't show lower case N's ########################################################################## # running simple repeat (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/simpleRepeat cd /hive/data/genomes/melUnd1/bed/simpleRepeat time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \ melUnd1 > do.log 2>&1 & # real 23m16.165s cat fb.simpleRepeat # 5906822 bases of 1086620216 (0.544%) in intersection # not going to add to rmsk here, using the window masker instead since # it masks more sequence cd /hive/data/genomes/melUnd1 twoBitMask melUnd1.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed melUnd1.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa melUnd1.2bit stdout | faSize stdin > faSize.melUnd1.2bit.txt cat faSize.melUnd1.2bit.txt # 2608572064 bases (131440969 N's 2477131095 real 1320629270 upper # 1156501825 lower) in 2685 sequences in 1 files # Total size: mean 971535.2 sd 4827933.6 min 1003 (AGCE01151413) # max 72162052 (JH378105) median 1553 rm /gbdb/melUnd1/melUnd1.2bit ln -s `pwd`/melUnd1.2bit /gbdb/melUnd1/melUnd1.2bit ######################################################################### # Verify all gaps are marked, add any N's not in gap as type 'other' # (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/gap cd /hive/data/genomes/melUnd1/bed/gap time nice -n +19 findMotif -motif=gattaca -verbose=4 \ -strand=+ ../../melUnd1.unmasked.2bit > findMotif.txt 2>&1 # real 0m8.820s grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed time featureBits -countGaps melUnd1 -not gap -bed=notGap.bed # 1086620216 bases of 1117373619 (97.248%) in intersection # real 0m7.713s time featureBits -countGaps melUnd1 allGaps.bed notGap.bed -bed=new.gaps.bed # 5401 bases of 1117373619 (0.000%) in intersection # real 6m52.681s # what is the highest index in the existing gap table: hgsql -N -e "select ix from gap;" melUnd1 | sort -n | tail -1 # 2622 cat << '_EOF_' > mkGap.pl #!/bin/env perl use strict; use warnings; my $ix=`hgsql -N -e "select ix from gap;" melUnd1 | sort -n | tail -1`; chomp $ix; open (FH,") { my ($chrom, $chromStart, $chromEnd, $rest) = split('\s+', $line); ++$ix; printf "%s\t%d\t%d\t%d\tN\t%d\tother\tyes\n", $chrom, $chromStart, $chromEnd, $ix, $chromEnd-$chromStart; } close (FH); '_EOF_' # << happy emacs chmod +x ./mkGap.pl ./mkGap.pl > other.bed featureBits -countGaps melUnd1 other.bed # 5401 bases of 1117373619 (0.000%) in intersection wc -l other.bed # 5371 hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/gap.sql \ -noLoad melUnd1 otherGap other.bed # Read 5371 elements of size 8 from other.bed # starting with this many hgsql -e "select count(*) from gap;" melUnd1 # 45651 hgsql melUnd1 -e 'load data local infile "bed.tab" into table gap;' # result count: hgsql -e "select count(*) from gap;" melUnd1 # 51022 # == 45651 + 5371 # there is an odd one marked as bridged when it is actually a continuation # of a non-bridged gap. # Found when running gapToLift below, without the -minGap=100. # chr22:42178985-42178985 hgsql -e 'update gap set bridge="no" where chromStart=42178984 AND chrom="chr22";' melUnd1 # verify we aren't adding gaps where gaps already exist # this would output errors if that were true: gapToLift -minGap=1 melUnd1 nonBridged.lift -bedFile=nonBridged.bed # see example in danRer7.txt # no unannotated gaps, no more to run here # there are no non-bridged gaps here: hgsql -N -e "select bridge from gap;" melUnd1 | sort | uniq -c # 51022 yes ########################################################################## ## WINDOWMASKER (DONE - 2012-03-27 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/windowMasker cd /hive/data/genomes/melUnd1/bed/windowMasker time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev melUnd1 > do.log 2>&1 & # about 47 minutes # Masking statistics twoBitToFa melUnd1.wmsk.2bit stdout | faSize stdin # 1117373619 bases (30758804 N's 1086614815 real 878580317 upper # 208034498 lower) in 25212 sequences in 1 files # Total size: mean 44319.1 sd 774105.8 min 228 (JH533144) # max 39887635 (JH556617) median 1201 # %18.62 masked total, %19.15 masked real twoBitToFa melUnd1.wmsk.sdust.2bit stdout | faSize stdin # 1117373619 bases (30758804 N's 1086614815 real 873701307 upper # 212913508 lower) in 25212 sequences in 1 files # Total size: mean 44319.1 sd 774105.8 min 228 (JH533144) # max 39887635 (JH556617) median 1201 # %19.05 masked total, %19.59 masked real hgLoadBed melUnd1 windowmaskerSdust windowmasker.sdust.bed.gz # Read 5868579 elements of size 3 from windowmasker.sdust.bed.gz featureBits -countGaps melUnd1 windowmaskerSdust # 243668416 bases of 1117373619 (21.807%) in intersection # eliminate the gaps from the masking featureBits melUnd1 -not gap -bed=notGap.bed # 1086614815 bases of 1086614815 (100.000%) in intersection time nice -n +19 featureBits melUnd1 windowmaskerSdust notGap.bed \ -bed=stdout | gzip -c > cleanWMask.bed.gz # 212913508 bases of 1086614815 (19.594%) in intersection # real 4m52.646s # reload track to get it clean hgLoadBed melUnd1 windowmaskerSdust cleanWMask.bed.gz # Loaded 5864264 elements of size 4 time featureBits -countGaps melUnd1 windowmaskerSdust # 212913508 bases of 1117373619 (19.055%) in intersection # real 0m43.151s # mask with this clean result zcat cleanWMask.bed.gz \ | twoBitMask ../../melUnd1.unmasked.2bit stdin \ -type=.bed melUnd1.cleanWMSdust.2bit twoBitToFa melUnd1.cleanWMSdust.2bit stdout | faSize stdin \ > melUnd1.cleanWMSdust.faSize.txt cat melUnd1.cleanWMSdust.faSize.txt # 1117373619 bases (30758804 N's 1086614815 real 873701307 upper # 212913508 lower) in 25212 sequences in 1 files # Total size: mean 44319.1 sd 774105.8 min 228 (JH533144) # max 39887635 (JH556617) median 1201 # %19.05 masked total, %19.59 masked real # how much does this window masker and repeat masker overlap: featureBits -countGaps melUnd1 rmsk windowmaskerSdust # 62533481 bases of 1117373619 (5.596%) in intersection # real 0m53.765s ######################################################################### # MASK SEQUENCE WITH WM+TRF (DONE - 2012-03-29 - Hiram) cd /hive/data/genomes/melUnd1 twoBitMask -add bed/windowMasker/melUnd1.cleanWMSdust.2bit \ bed/simpleRepeat/trfMask.bed melUnd1.2bit # safe to ignore the warnings about BED file with >=13 fields time twoBitToFa melUnd1.2bit stdout | faSize stdin > faSize.melUnd1.txt # real 0m20.373s cat faSize.melUnd1.txt # 1117373619 bases (30758804 N's 1086614815 real 873453109 upper # 213161706 lower) in 25212 sequences in 1 files # Total size: mean 44319.1 sd 774105.8 min 228 (JH533144) # max 39887635 (JH556617) median 1201 # %19.08 masked total, %19.62 masked real # create symlink to gbdb ssh hgwdev cd /hive/data/genomes/melUnd1 rm /gbdb/melUnd1/melUnd1.2bit ln -s `pwd`/melUnd1.2bit /gbdb/melUnd1/melUnd1.2bit ######################################################################### # cpgIslands - (DONE - 2011-04-24 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/cpgIslands cd /hive/data/genomes/melUnd1/bed/cpgIslands time doCpgIslands.pl melUnd1 > do.log 2>&1 # real 58m33.619s cat fb.melUnd1.cpgIslandExt.txt # 8424594 bases of 1086614815 (0.775%) in intersection ######################################################################### # genscan - (DONE - 2011-04-26 - Hiram) mkdir /hive/data/genomes/melUnd1/bed/genscan cd /hive/data/genomes/melUnd1/bed/genscan time doGenscan.pl melUnd1 > do.log 2>&1 # one job failed ./runGsBig.csh JH556617 006 gtf/006/JH556617.gtf pep/006/JH556617.pep subopt/006/JH556617.bed # re-running with a window size of 2000000 OK # continuing: time doGenscan.pl -continue=makeBed melUnd1 > makeBed.log 2>&1 # real 56m49.050s cat fb.melUnd1.genscan.txt # 21738187 bases of 1086614815 (2.001%) in intersection cat fb.melUnd1.genscanSubopt.txt # 26863973 bases of 1086614815 (2.472%) in intersection ######################################################################### # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-03 - Hiram) # Use -repMatch=900, based on size -- for human we use 1024 # use the "real" number from the faSize measurement, # hg19 is 2897316137, calculate the ratio factor for 1024: calc \( 1086614815 / 2897316137 \) \* 1024 # ( 1086614815 / 2897316137 ) * 1024 = 384.042858 # round up to 400 cd /hive/data/genomes/melUnd1 time blat melUnd1.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=jkStuff/melUnd1.11.ooc -repMatch=400 # Wrote 14287 overused 11-mers to jkStuff/melUnd1.11.ooc # real 0m24.153s # there are no non-bridged gaps, no lift file needed for genbank hgsql -N -e "select bridge from gap;" melUnd1 | sort | uniq -c # 51022 yes # cd /hive/data/genomes/melUnd1/jkStuff # gapToLift melUnd1 melUnd1.nonBridged.lift -bedFile=melUnd1.nonBridged.bed # largest non-bridged contig: # awk '{print $3-$2,$0}' melUnd1.nonBridged.bed | sort -nr | head # 123773608 chrX 95534 123869142 chrX.01 ######################################################################### # AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram) # examine the file: /cluster/data/genbank/data/organism.lst # for your species to see what counts it has for: # organism mrnaCnt estCnt refSeqCnt # Melopsittacus undulatus 25 1 0 # to decide which "native" mrna or ests you want to specify in genbank.conf ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # edit etc/genbank.conf to add melUnd1 just after ce2 # melUnd1 (Budgerigar) melUnd1.serverGenome = /hive/data/genomes/melUnd1/melUnd1.2bit melUnd1.clusterGenome = /hive/data/genomes/melUnd1/melUnd1.2bit melUnd1.ooc = /hive/data/genomes/melUnd1/jkStuff/melUnd1.11.ooc melUnd1.lift = no melUnd1.refseq.mrna.native.pslCDnaFilter = ${lowCover.refseq.mrna.native.pslCDnaFilter} melUnd1.refseq.mrna.xeno.pslCDnaFilter = ${lowCover.refseq.mrna.xeno.pslCDnaFilter} melUnd1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter} melUnd1.genbank.mrna.xeno.pslCDnaFilter = ${lowCover.genbank.mrna.xeno.pslCDnaFilter} melUnd1.genbank.est.native.pslCDnaFilter = ${lowCover.genbank.est.native.pslCDnaFilter} melUnd1.refseq.mrna.native.load = no melUnd1.refseq.mrna.xeno.load = yes melUnd1.genbank.mrna.xeno.load = yes melUnd1.genbank.est.native.load = no melUnd1.downloadDir = melUnd1 melUnd1.perChromTables = no # end of section added to etc/genbank.conf git commit -m "adding melUnd1 budgerigar" etc/genbank.conf git push make etc-update git pull # Edit src/lib/gbGenome.c to add new species. git commit -m "adding definition for melUndNames" \ src/lib/gbGenome.c git push make install-server ssh hgwdev # used to do this on "genbank" machine screen -S melUnd1 # long running job managed in screen cd /cluster/data/genbank time nice -n +19 ./bin/gbAlignStep -initial melUnd1 & # after setting genbank.mrna.xeno.load = no # var/build/logs/2012.06.07-16:08:32.melUnd1.initalign.log # real 883m56.428s # load database when finished ssh hgwdev cd /cluster/data/genbank time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad melUnd1 & # real 10m14.118s # var/dbload/hgwdev/logs/2012.06.10-12:54:07.dbload.log # enable daily alignment and update of hgwdev (DONE - 2012-05-09 - Hiram) cd ~/kent/src/hg/makeDb/genbank git pull # add melUnd1 to: etc/align.dbs etc/hgwdev.dbs git commit -m "Added melUnd1." etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # set default position to RHO gene displays (DONE - 2012-07-23 - Hiram) hgsql -e \ 'update dbDb set defaultPos="JH556451:700038-706370" where name="melUnd1";' \ hgcentraltest ############################################################################ # pushQ entry (DONE - 2012-07-23 - Hiram) mkdir /hive/data/genomes/melUnd1/pushQ cd /hive/data/genomes/melUnd1/pushQ # Mark says don't let the transMap track get there time makePushQSql.pl melUnd1 2> stderr.txt | grep -v transMap > melUnd1.sql # real 4m1.757s scp -p melUnd1.sql hgwbeta:/tmp ssh hgwbeta cd /tmp hgsql qapushq < melUnd1.sql ############################################################################ # LASTZ Medium Ground Finch geoFor1 (DONE - 2012-07-28 - Hiram) # establish a screen to control this job with a name to indicate what it is screen -S melUnd1 mkdir /hive/data/genomes/melUnd1/bed/lastzGeoFor1.2012-07-28 cd /hive/data/genomes/melUnd1/bed/lastzGeoFor1.2012-07-28 # adjust the SEQ2_LIMIT with -stop=partition to get a reasonable # number of jobs, 50,000 to something under 100,000 cat << '_EOF_' > DEF # Budgerigar vs. Medium Ground Finch BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.02/bin/lastz # TARGET: Budgerigar melUnd1 SEQ1_DIR=/hive/data/genomes/melUnd1/melUnd1.2bit SEQ1_LEN=/hive/data/genomes/melUnd1/chrom.sizes SEQ1_CHUNK=10000000 SEQ1_LAP=10000 SEQ1_LIMIT=100 # QUERY: Medium Ground Finch GeoFor1 SEQ2_DIR=/hive/data/genomes/geoFor1/geoFor1.2bit SEQ2_LEN=/hive/data/genomes/geoFor1/chrom.sizes SEQ2_CHUNK=10000000 SEQ2_LAP=0 SEQ2_LIMIT=100 BASE=/hive/data/genomes/melUnd1/bed/lastzGeoFor1.2012-07-28 TMPDIR=/scratch/tmp '_EOF_' # << happy emacs time doBlastzChainNet.pl -verbose=2 `pwd`/DEF \ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \ > do.log 2>&1 # real 1628m48.053s cat fb.melUnd1.chainGeoFor1Link.txt # 832321696 bases of 1086614815 (76.598%) in intersection # set sym link to indicate this is the lastz for this genome: cd /hive/data/genomes/melUnd1/bed ln -s lastzGeoFor1.2012-07-28 lastz.geoFor1 mkdir /hive/data/genomes/geoFor1/bed/blastz.melUnd1.swap cd /hive/data/genomes/geoFor1/bed/blastz.melUnd1.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/melUnd1/bed/lastzGeoFor1.2012-07-28/DEF \ -swap -syntenicNet \ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \ > swap.log 2>&1 & # real 56m32.909s cat fb.geoFor1.chainMelUnd1Link.txt # 839798431 bases of 1041286029 (80.650%) in intersection # set sym link to indicate this is the lastz for this genome: cd /hive/data/genomes/geoFor1/bed ln -s blastz.melUnd1.swap lastz.melUnd1 ############################################################################## # lastz Mouse mm10 (DONE - 2012-04-02 - Hiram) # original alignment cd /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29 cat fb.mm10.chainMelUnd1Link.txt # 95217653 bases of 2652783500 (3.589%) in intersection # and for the swap mkdir /hive/data/genomes/melUnd1/bed/blastz.mm10.swap cd /hive/data/genomes/melUnd1/bed/blastz.mm10.swap time nice -n +19 doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/mm10/bed/lastzMelUnd1.2012-03-29/DEF \ -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \ -swap -chainMinScore=5000 -chainLinearGap=loose > do.log 2>&1 & # real 9m9.260s cat fb.melUnd1.chainMm10Link.txt # 79867911 bases of 1086614815 (7.350%) in intersection # set sym link to indicate this is the lastz for this genome: cd /hive/data/genomes/melUnd1/bed ln -s blastz.mm10.swap lastz.mm10 #########################################################################