# for emacs: -*- mode: sh; -*-

#       DATE:	29-Jun-2011
#       ORGANISM:	Nomascus leucogenys
#       TAXID:	61853
#       ASSEMBLY LONG NAME:	Nleu1.1
#       ASSEMBLY SHORT NAME:	Nleu1.1
#       ASSEMBLY SUBMITTER:	Gibbon Genome Sequencing Consortium
#       ASSEMBLY TYPE:	Haploid
#       NUMBER OF ASSEMBLY-UNITS:	2
#       ASSEMBLY ACCESSION:	GCA_000146795.2
#       ##Below is a 2 column list with assembly-unit id and name.
#       ##The Primary Assembly unit is listed first.
#       GCA_000146805.1	Primary Assembly
#       GCA_000231795.1	non-nuclear

#       FTP-RELEASE DATE: 28-Oct-2011

#       http://www.ncbi.nlm.nih.gov/genome/480
#       http://www.ncbi.nlm.nih.gov/genome/assembly/313108/
#       http://www.ncbi.nlm.nih.gov/bioproject/13975
#       chrMt scaffolds included in the download directory

#       http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ADFV01
#       Genome Coverage : 5.6x in Q20 bases

#       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=61853

# rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Nomascus_leucogenys/Nleu1.1/

##########################################################################
# Download sequence (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2
    cd /hive/data/genomes/nomLeu2
    mkdir genbank
    cd genbank
    time rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Nomascus_leucogenys/Nleu1.1/ ./
    #   real    real    22m54.139s

    # verify the size of the sequence here:
    faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \
        non-nuclear/unlocalized_scaffolds/FASTA/chrMT.unlocalized.scaf.fa.gz
    #   2936052603 bases (179443556 N's 2756609047 real 2756609047 upper 0
    #   lower) in 17976 sequences in 2 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (gi|350542783|gb|ADFV01197901.1|)
    #   max 74231199 (gi|306404713|gb|GL397261.1|) median 4849

##########################################################################
# fixup names for UCSC standards (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/ucsc
    cd /hive/data/genomes/nomLeu2/ucsc

    ########################  Unplaced scaffolds
    # verify we don't have any .acc numbers different from .1
    zcat \
    ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
	| cut -f1 | egrep "^GL|^ADFV" \
	| sed -e 's/^GL[0-9][0-9]*//; s/^ADFV[0-9][0-9]*//' | sort | uniq -c
    #   377832 .1

    # this is like the unplaced.pl script in other assemblies except it
    #	does not add chrUn_ to the names since they are all just scaffolds

    cat << '_EOF_' > unplaced.pl
#!/bin/env perl

use strict;
use warnings;

my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
open (FH, "zcat $agpFile|") or die "can not read $agpFile";
open (UC, "|gzip -c > unplaced.agp.gz") or die "can not write to unplaced.agp.gz";
while (my $line = <FH>) {
    if ($line =~ m/^#/) {
        print UC $line;
    } else {
        $line =~ s/\.1//;    
        printf UC "%s", $line;
    }
}
close (FH);
close (UC);

open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
open (UC, "|gzip -c > unplaced.fa.gz") or die "can not write to unplaced.fa.gz";
while (my $line = <FH>) {
    if ($line =~ m/^>/) {
        chomp $line;
        $line =~ s/.*gb\|//;
        $line =~ s/\|.*//;
        $line =~ s/\.1//;    
        printf UC ">$line\n";
    } else {
        print UC $line;
    }
}
close (FH);
close (UC);
'_EOF_'
    # << happy emacs
    chmod +x unplaced.pl
    time ./unplaced.pl
    #   real    13m19.283s

genbank/non-nuclear/unlocalized_scaffolds/FASTA
    ########################  Unlocalized scaffolds
    cat << '_EOF_' > unlocalized.pl
#!/bin/env perl

use strict;
use warnings;

my %accToChr;
my %chrNames;

open (FH, "<../genbank/non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf") or
        die "can not read non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf";
while (my $line = <FH>) {
    next if ($line =~ m/^#/);
    chomp $line;
    my ($chrN, $acc) = split('\s+', $line);
    $accToChr{$acc} = $chrN;
    $chrNames{$chrN} += 1;
}
close (FH);

foreach my $chrN (keys %chrNames) {
    my $agpFile =  "../genbank/non-nuclear/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
    my $fastaFile =  "../genbank/non-nuclear/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
    open (FH, "zcat $agpFile|") or die "can not read $agpFile";
    open (UC, "|sed -e 's/chrMT/chrM/g;' | gzip -c >chr${chrN}_random.agp.gz") or die "can not write to chr${chrN}_random.agp";
    while (my $line = <FH>) {
        if ($line =~ m/^#/) {
            print UC $line;
        } else {
            chomp $line;
            my (@a) = split('\t', $line);
            my $acc = $a[0];
            my $accNo1 = $acc;
            $accNo1 =~ s/.1$//;
            die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${accNo1}_random";
            printf UC "%s", $ucscName;
            for (my $i = 1; $i < scalar(@a); ++$i) {
                printf UC "\t%s", $a[$i];
            }
            printf UC "\n";
        }
    }
    close (FH);
    close (UC);
    printf "chr%s\n", $chrN;
    open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
    open (UC, "|sed -e 's/chrMT/chrM/g' | gzip -c >chr${chrN}_random.fa.gz") or die "can not write to chr${chrN}_random.fa";
    while (my $line = <FH>) {
        if ($line =~ m/^>/) {
            chomp $line;
            my $acc = $line;
            $acc =~ s/.*gb\|//;
            $acc =~ s/\|.*//;
            my $accNo1 = $acc;
            $accNo1 =~ s/.1$//;
            die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
            die "ERROR: chrN $chrN not correct for $acc"
                if ($accToChr{$acc} ne $chrN);
            my $ucscName = "chr${chrN}_${accNo1}_random";
            printf UC ">$ucscName\n";
        } else {
            print UC $line;
        }
    }
    close (FH);
    close (UC);
}
'_EOF_'
    # << happy emacs
    chmod +x unlocalized.pl
    time ./unlocalized.pl
    mv chrMT_random.fa.gz chrM_random.fa.gz
    mv chrMT_random.agp.gz chrM_random.agp.gz
    # verify nothing lost from original:
    time faSize *.fa.gz
    #   2936052603 bases (179443556 N's 2756609047 real 2756609047 upper 0
    #   lower) in 17976 sequences in 2 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849

    # make sure none of the names got to be over 31 characers long:
    zcat *.agp.gz | grep -v "^#" | cut -f1 | awk '{print length($0)}' \
        | sort -rn | uniq -c | head
    #       8 24
    #   10004 12
    #  367828 8

##########################################################################
# Initial makeGenomeDb.pl (DONE - 2012-04-13 - Hiram)
    cd /hive/data/genomes/nomLeu2
    # mitoAcc - chrMt sequence is included in the download files
    cat << '_EOF_' > nomLeu2.config.ra
# Config parameters for makeGenomeDb.pl:
db nomLeu2
clade mammal
genomeCladePriority 13
scientificName Nomascus leucogenys
commonName Gibbon
assemblyDate Jun. 2011
assemblyLabel Gibbon Genome Sequencing Consortium Nleu1.1 (NCBI project 13975, GCA_000146795.2, WGS ADFV01)
assemblyShortLabel GGSC Nleu1.1
ncbiAssemblyName Nleu1.1
ncbiAssemblyId 313108
orderKey 329
mitoAcc none
fastaFiles /hive/data/genomes/nomLeu2/ucsc/*.fa.gz
agpFiles /hive/data/genomes/nomLeu2/ucsc/*.agp.gz
# qualFiles none
dbDbSpeciesDir gibbon
taxId 61853
'_EOF_'
    # << happy emacs

    # verify sequence and agp are OK
    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
        -stop=agp nomLeu2.config.ra > agp.log 2>&1
    #   real    2m4.625s
    # verify OK:
    tail -1 agp.log
    #   *** All done!  (through the 'agp' step)

    # finish it off
    time makeGenomeDb.pl -continue=db -workhorse=hgwdev -fileServer=hgwdev \
        -dbHost=hgwdev nomLeu2.config.ra > db.log 2>&1
    #   real    21m42.920s
    #	add the trackDb entries to the source tree, and the 2bit link:
    ln -s `pwd`/nomLeu2.unmasked.2bit /gbdb/nomLeu2/nomLeu2.2bit
    #	browser should function now

#########################################################################
# running repeat masker (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/repeatMasker
    cd /hive/data/genomes/nomLeu2/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek nomLeu2 > do.log 2>&1 &
    #   real    1161m52.449s

    cat faSize.rmsk.txt
    #   2936052603 bases (179443556 N's 2756609047 real 1339535283 upper
    #   1417073764 lower) in 17976 sequences in 1 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
    #   %48.26 masked total, %51.41 masked real

    egrep -i "versi|relea" do.log
#    April 26 2011 (open-3-3-0) version of RepeatMasker
# CC   RELEASE 20110920; 
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $

    time featureBits -countGaps nomLeu2 rmsk
    #   1418576934 bases of 2936052603 (48.316%) in intersection
    #   real    0m40.362s

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/simpleRepeat
    cd /hive/data/genomes/nomLeu2/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	nomLeu2 > do.log 2>&1 &
    #   real    19m41.095s
    XXX 1 failed job, which is all on chrM, a known failure for TRF:
    # ./TrfRun.csh /hive/data/genomes/nomLeu2/TrfPart/000/000.lst.bed
    # make empty result exist:
    touch /hive/data/genomes/nomLeu2/TrfPart/000/000.lst.bed
# Completed: 74 of 75 jobs
# Crashed: 1 jobs
# CPU time in finished jobs:      16153s     269.21m     4.49h    0.19d  0.001 y
# IO & Wait Time:                   198s       3.30m     0.06h    0.00d  0.000 y
# Average job time:                 221s       3.68m     0.06h    0.00d
# Longest finished job:            2158s      35.97m     0.60h    0.02d
# Submission to last job:          2906s      48.43m     0.81h    0.03d

    # continuing:
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	-continue=filter nomLeu2 > filter.log 2>&1 &
    #   real    0m53.367s

    cat fb.simpleRepeat 
    #   122433828 bases of 2756609047 (4.441%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/nomLeu2
    twoBitMask nomLeu2.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed nomLeu2.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa nomLeu2.2bit stdout | faSize stdin > faSize.nomLeu2.2bit.txt
    cat faSize.nomLeu2.2bit.txt
    #   2936052603 bases (179443556 N's 2756609047 real 1338486944 upper
    #   1418122103 lower) in 17976 sequences in 1 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
    #   %48.30 masked total, %51.44 masked real

    rm /gbdb/nomLeu2/nomLeu2.2bit
    ln -s `pwd`/nomLeu2.2bit /gbdb/nomLeu2/nomLeu2.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/gap
    cd /hive/data/genomes/nomLeu2/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../nomLeu2.unmasked.2bit > findMotif.txt 2>&1
    #	real    0m23.121s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits nomLeu2 -not gap -bed=notGap.bed
    #   2756609047 bases of 2756609047 (100.000%) in intersection
    #   real    0m25.158s

    time featureBits nomLeu2 allGaps.bed notGap.bed -bed=new.gaps.bed
    #   0 bases of 2756609047 (0.000%) in intersection
    #   real    15m56.963s

    # no new gaps, nothing to do here

    # are there non-bridged gaps here:
    hgsql -N -e "select bridge from gap;" nomLeu2 | sort | uniq -c
    #   179932 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-04-13 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/windowMasker
    cd /hive/data/genomes/nomLeu2/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev nomLeu2 > do.log 2>&1 &
    #   real    196m2.385s

    # Masking statistics
    twoBitToFa nomLeu2.wmsk.2bit stdout | faSize stdin
    #   2936052603 bases (179443556 N's 2756609047 real 1711898047 upper
    #   1044711000 lower) in 17976 sequences in 1 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
    #   %35.58 masked total, %37.90 masked real

    twoBitToFa nomLeu2.wmsk.sdust.2bit stdout | faSize stdin
    #   2936052603 bases (179443556 N's 2756609047 real 1697032515 upper
    #   1059576532 lower) in 17976 sequences in 1 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
    #   %36.09 masked total, %38.44 masked real

    hgLoadBed nomLeu2 windowmaskerSdust windowmasker.sdust.bed.gz
    #   Read 15229701 elements of size 3 from windowmasker.sdust.bed.gz

    featureBits -countGaps nomLeu2 windowmaskerSdust
    #   1185785055 bases of 2808525991 (42.221%) in intersection

    #	eliminate the gaps from the masking
    time featureBits nomLeu2 -not gap -bed=notGap.bed
    #   2756609047 bases of 2756609047 (100.000%) in intersection
    #   real    0m22.205s

    time nice -n +19 featureBits nomLeu2 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    #   1059576532 bases of 2756609047 (38.438%) in intersection
    #   real    11m9.332s

    #	reload track to get it clean
    hgLoadBed nomLeu2 windowmaskerSdust cleanWMask.bed.gz
    #   Read 15242903 elements of size 4 from cleanWMask.bed.gz
    featureBits -countGaps nomLeu2 windowmaskerSdust
    #   1059576532 bases of 2936052603 (36.088%) in intersection
    #   real    1m38.313s
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../nomLeu2.unmasked.2bit stdin \
	    -type=.bed nomLeu2.cleanWMSdust.2bit
    twoBitToFa nomLeu2.cleanWMSdust.2bit stdout | faSize stdin \
        > nomLeu2.cleanWMSdust.faSize.txt
    cat nomLeu2.cleanWMSdust.faSize.txt
    #   2936052603 bases (179443556 N's 2756609047 real 1697032515 upper
    #   1059576532 lower) in 17976 sequences in 1 files
    #   Total size: mean 163331.8 sd 2015356.2
    #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
    #   %36.09 masked total, %38.44 masked real

    # how much does this window masker and repeat masker overlap:
    featureBits -countGaps nomLeu2 rmsk windowmaskerSdust
    #   837096446 bases of 2936052603 (28.511%) in intersection

#########################################################################
# MASK SEQUENCE WITH WM+TRF (DONE - 2012-04-13 - Hiram)
    #	since rmsk masks so very little of the genome, use the clean WM mask
    #	on the genome sequence
#    cd /hive/data/genomes/nomLeu2
#    twoBitMask -add bed/windowMasker/nomLeu2.cleanWMSdust.2bit \
#	bed/simpleRepeat/trfMask.bed nomLeu2.2bit
    #	safe to ignore the warnings about BED file with >=13 fields
#    twoBitToFa nomLeu2.2bit stdout | faSize stdin > faSize.nomLeu2.txt
#    cat faSize.nomLeu2.txt
    #   927696114 bases (111611440 N's 816084674 real 607935500 upper
    #   208149174 lower) in 5678 sequences in 1 files
    #   Total size: mean 163384.3 sd 1922194.0 min 1000 (AERX01077754-1)
    #   max 51042256 (chrLG7) median 2009
    #   %22.44 masked total, %25.51 masked real

    #	create symlink to gbdb
#    rm /gbdb/nomLeu2/nomLeu2.2bit
#    ln -s `pwd`/nomLeu2.2bit /gbdb/nomLeu2/nomLeu2.2bit

########################################################################
# cpgIslands - (DONE - 2011-04-23 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/cpgIslands
    cd /hive/data/genomes/nomLeu2/bed/cpgIslands
    time doCpgIslands.pl nomLeu2 > do.log 2>&1
    #   Elapsed time: 61m44s

    cat fb.nomLeu2.cpgIslandExt.txt
    #   17731794 bases of 2756609047 (0.643%) in intersection

#########################################################################
# genscan - (DONE - 2011-04-26 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/bed/genscan
    cd /hive/data/genomes/nomLeu2/bed/genscan
    time doGenscan.pl nomLeu2 > do.log 2>&1
    #   Elapsed time: 66m40s

    cat fb.nomLeu2.genscan.txt
    #   49800461 bases of 2756609047 (1.807%) in intersection
    cat fb.nomLeu2.genscanSubopt.txt
    #   52057790 bases of 2756609047 (1.888%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-05-07 - Hiram)
    # Use -repMatch=900, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2756609047 / 2897316137 \) \* 1024
    #	( 2756609047 / 2897316137 ) * 1024 = 974.269818

    # round up to 1000 (nomLeu1 was 900)

    cd /hive/data/genomes/nomLeu2
    time blat nomLeu2.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/nomLeu2.11.ooc -repMatch=1000
    #   Wrote 30203 overused 11-mers to jkStuff/nomLeu2.11.ooc
    #   real    1m47.770s
    #	nomLeu1 was: # Wrote 36558 overused 11-mers to jkStuff/nomLeu1.11.ooc

    # there are no non-bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" nomLeu2 | sort | uniq -c
    #   179932 yes
#    cd /hive/data/genomes/nomLeu2/jkStuff
#    gapToLift nomLeu2 nomLeu2.nonBridged.lift -bedFile=nomLeu2.nonBridged.bed
    # largest non-bridged contig:
#    awk '{print $3-$2,$0}' nomLeu2.nonBridged.bed | sort -nr | head
    #   123773608 chrX  95534   123869142       chrX.01

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-07 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Heterocephalus glaber	45	0	0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add nomLeu2 just after ce2
# nomLeu2 (Gibbon)
nomLeu2.serverGenome = /hive/data/genomes/nomLeu2/nomLeu2.2bit
nomLeu2.clusterGenome = /hive/data/genomes/nomLeu2/nomLeu2.2bit
nomLeu2.ooc = /hive/data/genomes/nomLeu2/jkStuff/nomLeu2.11.ooc
nomLeu2.lift = no
nomLeu2.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
nomLeu2.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
nomLeu2.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
nomLeu2.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
nomLeu2.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
nomLeu2.refseq.mrna.native.load = no
nomLeu2.refseq.mrna.xeno.load = yes
nomLeu2.genbank.mrna.native.load = no
nomLeu2.genbank.mrna.xeno.load = yes
nomLeu2.genbank.est.native.load = no
nomLeu2.downloadDir = nomLeu2
nomLeu2.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding nomLeu2 Gibbon" etc/genbank.conf
    git push
    make etc-update

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S nomLeu2           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial nomLeu2 &
    #	 var/build/logs/2012.05.07-10:21:12.nomLeu2.initalign.log
    #   real    2135m50.446s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad nomLeu2 &
    #	logFile: var/dbload/hgwdev/logs/2012.02.09-10:05:25.dbload.log
    #   real    80m50.729s

    # enable daily alignment and update of hgwdev (DONE - 2012-02-09 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add nomLeu2 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added nomLeu2." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# set default position to RHO gene displays  (DONE - 2012-07-24 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="GL397298:23130040-23137416" where name="nomLeu2";' \
	hgcentraltest

############################################################################
# pushQ entry (DONE - 2012-07-24 - Hiram)
    mkdir /hive/data/genomes/nomLeu2/pushQ
    cd /hive/data/genomes/nomLeu2/pushQ
    # Mark says don't let the transMap track get there
    time makePushQSql.pl nomLeu2 2> stderr.txt | grep -v transMap > nomLeu2.sql
    #   real    3m52.913s

    scp -p nomLeu2.sql hgwbeta:/tmp
    ssh hgwbeta "hgsql qapushq < /tmp/nomLeu2.sql"

############################################################################