# for emacs: -*- mode: sh; -*-


# This file describes browser build for the Marmoset
# genome, July 2007
#
#	"$Id: calJac1.txt,v 1.22 2010/02/12 23:42:33 hiram Exp $"
#
######################################################################
## DOWNLOAD SEQUENCE (DONE - 2007-08-21 - Hiram)
    ssh kkstore06
    mkdir /cluster/store4/calJac1
    ln -s /cluster/store4/calJac1 /cluster/data/calJac1
    mkdir /cluster/data/calJac1/wustl
    cd /cluster/data/calJac1/wustl
    for F in supercontigs.agp.gz supercontigs.fa.gz contigs.fa.gz contigs.fa.qual.gz
do
    wget --timestamping \
/pub/organism/Primates/Callithrix_jacchus/assembly/Callithrix_jacchus-2.0.2/output/${F} \
    -O ${F}
done
    #	real    50m13.535s
    ls -ogrt
# -rw-rw-r--  1   6656649 Jun 19 17:03 supercontigs.agp.gz
# -rw-rw-r--  1 521109271 Jun 19 17:03 contigs.fa.qual.gz
# -rw-rw-r--  1 781437003 Jun 19 18:30 contigs.fa.gz
# -rw-rw-r--  1 851641082 Aug 21 13:29 supercontigs.fa.gz

##########################################################################
# fetch photograph (DONE - 2007-08-21 - Hiram)
    mkdir /cluster/data/calJac1/photo
    cd /cluster/data/calJac1/photo
    wget --timestamping \
	http://www.genome.gov/Images/press_photos/highres/82-300.jpg \
	    -O nhgri.original.82-300.jpg
    convert -geometry 300x200 -quality 80 nhgri.original.82-300.jpg \
	Callithrix_jacchus.jpg
    # check this .jpg image into the source tree browser/images/ directory

#######################################################################
## create config.ra and run makeGenomeDb.pl 
    ssh kkstore06
    cd /cluster/data/calJac1

    cat << '_EOF_' > calJac1.config.ra
# Config parameters for makeGenomeDb.pl:
db calJac1
scientificName Callithrix jacchus
commonName Marmoset
assemblyDate Jun. 2007
assemblyLabel WUSTL 2.0.2
orderKey 40
clade mammal
genomeCladePriority 16
mitoAcc none
fastaFiles /cluster/data/calJac1/wustl/supercontigs.fa.gz
agpFiles /cluster/data/calJac1/wustl/supercontigs.agp.gz
# qualFiles /dev/null
dbDbSpeciesDir marmoset
'_EOF_'
    # << happy emacs
    time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
	-stop=agp calJac1.config.ra > makeGenomeDb.out 2>&1 &
    #	real    24m24.468s
    time nice -n +19 ~/kent/src/hg/utils/automation/makeGenomeDb.pl \
	-continue=db calJac1.config.ra > db.continue.out 2>&1 &
    # add the trackDb files to the source tree and to the trackDb/makefile

##########################################################################
## Repeat masker (DONE - 2007-08-21 - Hiram)
    ssh kkstore06
    ## use screen for this
    mkdir /cluster/data/calJac1/bed/RepeatMasker
    cd /cluster/data/calJac1/bed/RepeatMasker
    time nice -n +19 ~/kent/src/hg/utils/automation/doRepeatMasker.pl \
	-bigClusterHub=kk \
	-buildDir=/cluster/data/calJac1/bed/RepeatMasker calJac1 > do.out 2>&1 &

##############################################################################
## simpleRepeat masking (DONE - 2007-09-05 - Hiram)
    ## create a kki kluster run
    ssh kkr1u00
    mkdir /iscratch/i/calJac1
    cd /iscratch/i/calJac1
    cp -p /cluster/data/calJac1/calJac1.unmasked.2bit .
    cp -p /cluster/data/calJac1/chrom.sizes .
    twoBitToFa calJac1.unmasked.2bit calJac1.unmasked.fa
    mkdir split
    #  split sequence into about 1000 files, each about 3,000,000 bases
    time nice -n +19 faSplit about calJac1.unmasked.fa 3000000 split/cj1_

    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress /iscratch/i/calJac1/ kkr${R}u00:/iscratch/i/calJac1/
done

    ssh kki
    mkdir -p /cluster/data/calJac1/bed/simpleRepeat/trf
    cd /cluster/data/calJac1/bed/simpleRepeat/trf

    cat << '_EOF_' > runTrf
#!/bin/csh -fe 
#
set C = $1:r
set SRC = /iscratch/i/calJac1/split/$C.fa
mkdir -p /scratch/tmp/$C
cp -p $SRC /scratch/tmp/$C/$C.fa
pushd /scratch/tmp/$C
/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $C.fa \
	/dev/null -bedAt=$C.bed -tempDir=/scratch/tmp/$C
popd
rm -f $C.bed
cp -p /scratch/tmp/$C/$C.bed .
rm -fr /scratch/tmp/$C
'_EOF_'
    # << happy emacs
    chmod +x runTrf

    cat << '_EOF_' > template
#LOOP
./runTrf $(path1) {check out line $(root1).bed}
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls /iscratch/i/calJac1/split > part.list
    gensub2 part.list single template jobList
    para create jobList
    para try ... check ... push ... etc ...
# Completed: 947 of 947 jobs
# CPU time in finished jobs:      37242s     620.70m    10.35h    0.43d  0.001 y
# IO & Wait Time:                  2842s      47.36m     0.79h    0.03d  0.000 y
# Average job time:                  42s       0.71m     0.01h    0.00d
# Longest finished job:            1318s      21.97m     0.37h    0.02d
# Submission to last job:          3572s      59.53m     0.99h    0.04d

    cat *.bed > ../simpleRepeat.bed
    cd ..
    awk '{if ($5 <= 12) print;}' simpleRepeat.bed > trfMask.bed

    ssh hgwdev
    cd /cluster/data/calJac1/bed/simpleRepeat
    time nice -n +19 hgLoadBed calJac1 simpleRepeat \
      simpleRepeat.bed -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql
    #	Loaded 846105 elements of size 16
    #	real    0m24.710s

    nice -n +19 featureBits calJac1 simpleRepeat \
	> fb.simpleRepeat.calJac1.txt 2>&1
    cat fb.simpleRepeat.calJac1.txt
    #	100489601 bases of 2929139385 (3.431%) in intersection

    #	add the trfMask to the rmsk masked sequence to get our final
    #	masked sequence
    ssh kkstore06
    cd /cluster/data/calJac1
    time nice -n +19 cat bed/simpleRepeat/trfMask.bed \
	| twoBitMask -add -type=.bed calJac1.rmsk.2bit stdin calJac1.2bit
    #	measure it
    time nice -n +19 twoBitToFa calJac1.2bit stdout \
	| faSize stdin > faSize.calJac1.2bit.txt 2>&1
    grep masked faSize.calJac1.2bit.txt
    #	%45.93 masked total, %47.50 masked real

    ## clean up the /iscratch/i/calJac1/ directory
    ssh kkr1u00
    cd /iscratch/i/calJac1
    rm -fr *
    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress --delete --stats /iscratch/i/calJac1/ kkr${R}u00:/iscratch/i/calJac1/
done
    cd ..
    rmdir calJac1
    for R in 2 3 4 5 6 7 8
do
    ssh kkr${R}u00 rmdir /iscratch/i/calJac1
done

############################################################################
#  BLATSERVERS ENTRY (DONE - 2007-09-06 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("calJac1", "blat13", "17786", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("calJac1", "blat13", "17787", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
##  BLASTZ swap from hg18 alignments (2007-11-11 - markd)
    ssh hgwdev
    mkdir /cluster/data/calJac1/bed/blastz.hg18.swap
    cd /cluster/data/calJac1/bed/blastz.hg18.swap
    ln -s blastz.hg18.swap ../blastz.hg18
    /cluster/bin/scripts/doBlastzChainNet.pl \
        -swap /cluster/data/hg18/bed/blastz.calJac1.2007-10-07/DEF >& swap.out&

    # fb.calJac1.chainHg18Link.txt:
    #   2426684781 bases of 2929139385 (82.846%) in intersection

    #	running syntenic net (DONE - 2007-12-14 - Hiram)
    time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
    /cluster/data/hg18/bed/blastz.calJac1.2007-10-07/DEF \
      -bigClusterHub=pk -continue=syntenicNet -syntenicNet \
      -swap -chainMinScore=3000 -chainLinearGap=medium > syntenicNet.log 2>&1 &
    #	real    8m24.277s
    #	failed during a chainSplit:
    #	Can't open chain/Contig836.chain to append: Too many open files

    #	create reciprocal best chains/nets for 9-way maf alignments
    ssh hgwdev
    cd /cluster/data/calJac1/bed/blastz.hg18.swap
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 hg18 \
	> rbest.log 2>&1 &

#########################################################################
## genscan run (DONE - 2007-11-08 - Hiram)
##	create hard masked sequence
    ssh kkstore06
    cd /cluster/data/calJac1
    twoBitToFa calJac1.2bit stdout \
	| maskOutFa stdin hard stdout | faToTwoBit stdin calJac1.hard.2bit
    #	And, make sure there aren't any sequences in this lot that have
    #	become all N's with no sequence left in them.  This drives genscan nuts
    twoBitToFa calJac1.hard.2bit stdout \
	| faCount stdin > faCount.hard.txt
    #	the lowest three are:
    egrep -v "^#|^total" faCount.hard.txt \
	| awk '{print $1,$2-$7}' | sort -k2,2nr | tail -3
    #	there are a lot of these that turned into zero sequence left
    #	will sort this out when making the sequence to use on the Iservers

    ssh kkr1u00
    mkdir /iscratch/i/calJac1/hardMasked
    cd /iscratch/i/calJac1/hardMasked
    twoBitToFa /cluster/data/calJac1/calJac1.hard.2bit stdout \
	| faSplit byname stdin ./
    #	more than 128 bases of sequence results in the removal of 1,208
    #	sequences that are too short, leaving 48,516 sequences
    egrep -v "^#|^total" /cluster/data/calJac1/faCount.hard.txt \
	| awk '{size=$2-$7; if (size < 128) {print $1}}' | while read F
do
    rm -f "${F}.fa"
    echo "${F}.fa"
done
    mkdir ../hardChunks
    cd ../hardChunks
    
    #	chunk them up into 4,000,000 base packages, no sequence is broken
    catDir ../hardMasked \
	| faSplit about stdin 4000000 c_
    rm -fr ../hardMasked

    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress ./ kkr${R}u00:/iscratch/i/calJac1/hardChunks/
done

    ssh hgwdev
    mkdir /cluster/data/calJac1/bed/genscan
    cd /cluster/data/calJac1/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux

    # Run on small cluster (more mem than big cluster).
    ssh kki
    cd /cluster/data/calJac1/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    #	Since we split on gaps, we have no chunks like that.  You can
    #	verify with faCount on the chunks.
    ls -1Sr /iscratch/i/calJac1/hardChunks/c_*.fa > genome.list

    # Create script to run gsBig
    cat << '_EOF_' > runGsBig
#!/bin/csh -fe
set runDir = `pwd`
set srcDir = $1
set inFile = $2
set fileRoot = $inFile:r
mkdir /scratch/tmp/$fileRoot
cp -p $srcDir/$inFile /scratch/tmp/$fileRoot
pushd /scratch/tmp/$fileRoot
/cluster/bin/x86_64/gsBig $inFile $fileRoot.gtf -trans=$fileRoot.pep -subopt=$fileRoot.bed -exe=$runDir/hg3rdParty/genscanlinux/genscan -par=$runDir/hg3rdParty/genscanlinux/HumanIso.smat -tmp=/scratch/tmp -window=2400000
popd
cp -p /scratch/tmp/$fileRoot/$fileRoot.gtf gtf
cp -p /scratch/tmp/$fileRoot/$fileRoot.pep pep
cp -p /scratch/tmp/$fileRoot/$fileRoot.bed subopt
rm -fr /scratch/tmp/$fileRoot
'_EOF_'
    # << happy emacs
    chmod +x runGsBig

    #	template file for gensub2
    cat << '_EOF_' > template
#LOOP
runGsBig /iscratch/i/calJac1/hardChunks $(file1) {check out line gtf/$(root1).gtf} {check out line pep/$(root1).pep} {check out line subopt/$(root1).bed} 
#ENDLOOP
'_EOF_'
    # << happy emacs

    gensub2 genome.list single template jobList
    para create jobList
    para try, check, push, check, ...
# Completed: 720 of 720 jobs
# CPU time in finished jobs:      55325s     922.09m    15.37h    0.64d  0.002 y
# IO & Wait Time:                  2063s      34.38m     0.57h    0.02d  0.000 y
# Average job time:                  80s       1.33m     0.02h    0.00d
# Longest finished job:             132s       2.20m     0.04h    0.00d
# Submission to last job:         65396s    1089.93m    18.17h    0.76d

    # cat and lift the results into single files
    ssh kkstore06
    cd /cluster/data/calJac1/bed/genscan
    cat gtf/c_*.gtf > genscan.gtf
    cat subopt/c_*.bed > genscanSubopt.bed
    cat pep/c_*.pep > genscan.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/calJac1/bed/genscan
    ldHgGene calJac1 -gtf genscan genscan.gtf
    #	Read 64005 transcripts in 344791 lines in 1 files
    #	64005 groups 23602 seqs 1 sources 1 feature types
    #	64005 gene predictions

    hgPepPred calJac1 generic genscanPep genscan.pep
    hgLoadBed calJac1 genscanSubopt genscanSubopt.bed
    #	Loaded 576960 elements of size 6

    #	check the numbers
    time nice -n +19 featureBits calJac1 genscan
    #	59205113 bases of 2929139385 (2.021%) in intersection
    #	the next closest genome with a genscan track
    time nice -n +19 featureBits panTro2 genscan
    #	53758386 bases of 2909485072 (1.848%) in intersection
    time nice -n +19 featureBits mm9 genscan
    #	55293837 bases of 2620346127 (2.110%) in intersection

############################################################################
# GENBANK AUTO UPDATE (DONE - 2007-11-21 - Hiram)
    #	Create a lift file as per the procedures for Chimp from the AGP:
    ssh kolossus
    cd /cluster/data/calJac1
    # MAKE 11.OOC FILE FOR BLAT
    blat calJac1.2bit \
	 /dev/null /dev/null -tileSize=11 -makeOoc=11.ooc -repMatch=1024
    #	Wrote 34303 overused 11-mers to 11.ooc

    # align with latest genbank process.
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # edit etc/genbank.conf to add calJac1 just after panTro2
    # calJac1
# Marmoset
calJac1.serverGenome = /cluster/data/calJac1/calJac1.2bit
calJac1.clusterGenome = /scratch/data/calJac1/calJac1.2bit
calJac1.ooc = /cluster/data/calJac1/calJac1/11.ooc
calJac1.lift = no
calJac1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
calJac1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
calJac1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
calJac1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
calJac1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
calJac1.genbank.est.xeno.pslCDnaFilter    = ${ordered.genbank.est.xeno.pslCDnaFilter}
calJac1.downloadDir = calJac1
calJac1.genbank.est.xeno.load = no
calJac1.refseq.mrna.native.load  = yes
calJac1.refseq.mrna.xeno.load = yes
calJac1.refseq.mrna.xeno.loadDesc  = yes

    cvs ci -m "Added calJac1." etc/genbank.conf
    # update /cluster/data/genbank/:
    make etc-update

    # Edit src/lib/gbGenome.c to add new species.  With these two lines:
    #	static char *calJacNames[] = {"Callithrix jacchus", NULL};
    #	{"calJac", calJacNames},

    cvs ci -m "Added Callithrix jacchus (Marmoset)." src/lib/gbGenome.c
    make install-server

    ssh genbank
    screen  # control this business with a screen since it takes a while
    cd /cluster/data/genbank

    # This is a call to a script that will push our jobs out to the cluster
    # since it's a big job.  
    time nice -n +19 bin/gbAlignStep -initial calJac1 &
    # logFile: var/build/logs/2007.11.20-11:31:54.calJac1.initalign.log
    #	real    607m38.957s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad calJac1
    #	logFile: var/dbload/hgwdev/logs/2007.11.25-10:26:23.dbload.log
    #	real    26m30.926s

    # enable daily alignment and update of hgwdev (DONE - 2007-11-21 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # add calJac1 to:
        etc/align.dbs
        etc/hgwdev.dbs
    cvs ci -m "Added calJac1." etc/align.dbs etc/hgwdev.dbs
    make etc-update

#########################################################################
# Blastz Platypus ornAna1 (DONE - 2007-11-14 - Hiram)
#	this was done a second time, see ornAna1.txt for the second run
#	since this run produced a null result for some unknown reason.
    ssh kkstore06
    screen # use screen to control this job
    mkdir /cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14
    cd /cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14

    cat << '_EOF_' > DEF
# Orangutan vs. platypus
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
BLASTZ_M=50

# QUERY: Marmoset calJac1
SEQ1_DIR=/cluster/bluearc/scratch/data/calJac1/calJac1.2bit
SEQ1_LEN=/cluster/data/calJac1/chrom.sizes
SEQ1_CHUNK=20000000
SEQ2_LIMIT=400
SEQ1_LAP=0

# QUERY: Platypus ornAna1
SEQ2_DIR=/cluster/bluearc/scratch/data/ornAna1/ornAna1.2bit
SEQ2_LEN=/cluster/data/ornAna1/chrom.sizes
SEQ2_CHUNK=40000000
SEQ2_LIMIT=400
SEQ2_LAP=0

BASE=/cluster/data/calJac1/bed/blastzOrnAna1.2007-11-14
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl DEF -chainMinScore=5000 \
	-chainLinearGap=loose -bigClusterHub=pk -verbose=2 > do.log 2>&1 &
    #	real    1927m20.962s - to the first pk crash
    #	this was a tough job to get finished.  Several pk crashes,
    #	problems with garbage in the para.results file, and so forth.
    #	But, it did finish as of Monday afternoon 2007-11-19
# Completed: 899536 of 900180 jobs
# Crashed: 644 jobs
# CPU time in finished jobs:  131663141s 2194385.68m 36573.09h 1523.88d  4.175 y
# IO & Wait Time:              12592457s  209874.29m  3497.90h  145.75d  0.399 y
# Average job time:                 160s       2.67m     0.04h    0.00d
# Longest finished job:            1795s      29.92m     0.50h    0.02d
# Submission to last job:        440290s    7338.17m   122.30h    5.10d
    #	despite the '644 jobs' crashed, they are actually done and all results
    #	are complete

    #	continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 `pwd`/DEF \
	-chainMinScore=5000 -chainLinearGap=loose \
	-continue=cat -bigClusterHub=pk > cat.log 2>&1 &
    #	real    31m45.069s
    cat fb.calJac1.chainOrnAna1Link.txt
    #	0 bases of 2929139385 (0.000%) in intersection
    #	This error was fixed in the script.  It failed on a command in one of
    #	the ssh scripts that happened to run under the bash shell which did
    #	not detect the error in a set of piped commands.  It was a shell
    #	wild-card expansion problem, changed to a 'find' to avoid that.

###########################################################################
## BLASTZ Mouse Mm9 swap (DONE - 2007-09-07 - Hiram
    ssh kkstore06
    #	use a screen to control this job
    screen
    #	the original alignment
    cd /cluster/data/mm9/bed/blastzCalJac1.2007-09-06
    cat fb.mm9.chainCalJac1Link.txt
    #	863961573 bases of 2620346127 (32.971%) in intersection

    #	the swap
    mkdir /cluster/data/calJac1/bed/blastz.mm9.swap
    cd /cluster/data/calJac1/bed/blastz.mm9.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
	-stop=load -chainMinScore=3000 \
	-swap -chainLinearGap=medium -bigClusterHub=pk > swap.log 2>&1 &
    #	real    217m10.835s
    cat fb.calJac1.chainMm9Link.txt
    #	887586922 bases of 2929139385 (30.302%) in intersection
    time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
	-verbose=2 /cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
	-continue=download -chainMinScore=3000 \
	-swap -chainLinearGap=medium -bigClusterHub=pk > download.log 2>&1 &
    #	real    1m9.876s

    #	run the syntenic net for multiple alignment (DONE - 2007-12-14 - Hiram)
    time nice -n +19 $HOME/kent/src/hg/utils/automation/doBlastzChainNet.pl \
	-verbose=2 /cluster/data/mm9/bed/blastzCalJac1.2007-09-06/DEF \
	-syntenicNet -continue=syntenicNet -chainMinScore=3000 \
	-swap -chainLinearGap=medium -bigClusterHub=pk > syntenicNet.log 2>&1 &
    #	real    7m23.683s
    #	failed during a chainSplit:
    #	Can't open chain/Contig1203.chain to append: Too many open files

    #	create reciprocal best chains/nets for 9-way multiple alignment
    ssh hgwdev
    cd /cluster/data/calJac1/bed/blastz.mm9.swap
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 mm9 \
	> rbest.log 2>&1 &
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 mm9 \
	-continue=download > rbest.download.log 2>&1 &

###########################################################################
# Blastz swap Chimp panTro2 (DONE - 2007-11-14 - Hiram)
    ssh kkstore06
    screen # use screen to manage this job
    cd /cluster/data/panTro2/bed/blastzCalJac1.2007-11-13
    cat fb.panTro2.chainCalJac1Link.txt
    #	2220169777 bases of 2909485072 (76.308%) in intersection

    mkdir /cluster/data/calJac1/bed/blastz.panTro2.swap
    cd /cluster/data/calJac1/bed/blastz.panTro2.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/panTro2/bed/blastzCalJac1.2007-11-13/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-swap -bigClusterHub=pk > swap.log 2>&1 &
    #	real    320m14.293s
    cat fb.calJac1.chainPanTro2Link.txt
    #	2264115411 bases of 2929139385 (77.296%) in intersection

    #	create reciprocal best chains/nets for 9-way maf alignments
    ssh hgwdev
    cd /cluster/data/calJac1/bed/blastz.panTro2.swap
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 panTro2 \
	> rbest.log 2>&1 &

###########################################################################
# SWAP BLASTZ Orangutan ponAbe2 (DONE - 2007-11-29 - Hiram)
    #	primary blastz result
    cd /cluster/data/ponAbe2/bed/blastzCalJac1.2007-11-18
    cat fb.ponAbe2.chainCalJac1Link.txt
    #	2310720863 bases of 3093572278 (74.694%) in intersection

    #	and for the swap
    ssh kkstore02
    mkdir /cluster/data/calJac1/bed/blastz.ponAbe2.swap
    cd /cluster/data/calJac1/bed/blastz.ponAbe2.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/ponAbe2/bed/blastzCalJac1.2007-11-18/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-swap -bigClusterHub=kk > swap.log 2>&1 &
    #	real    341m54.548s
    cat fb.calJac1.chainPonAbe2Link.txt
    #	2253236255 bases of 2929139385 (76.925%) in intersection

    #	reciprocal best for 9-way maf alignments
    ssh hgwdev
    cd /cluster/data/calJac1/bed/blastz.ponAbe2.swap
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 ponAbe2 \
	> rbest.log 2>&1 &
    #	real    96m17.285s

###########################################################################
# SWAP BLASTZ Dog canFam2 (DONE - 2007-11-30 - Hiram)
    # primary blastz result
    cd /cluster/data/canFam2/bed/blastzCalJac1.2007-11-28
    cat fb.canFam2.chainCalJac1Link.txt
    #	1369690756 bases of 2384996543 (57.429%) in intersection

    mkdir /cluster/data/calJac1/bed/blastz.canFam2.swap
    cd /cluster/data/calJac1/bed/blastz.canFam2.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/canFam2/bed/blastzCalJac1.2007-11-28/DEF \
	-chainMinScore=3000 -chainLinearGap=medium \
	-swap -bigClusterHub=kk > swap.log 2>&1 &
    #	encountered difficulties with /scratch/data/ on kolossus
    #	had to finish the netChains.csh script manually, then continuing:
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/canFam2/bed/blastzCalJac1.2007-11-28/DEF \
	-continue=load -chainMinScore=3000 -chainLinearGap=medium \
	-swap -bigClusterHub=kk > load.log 2>&1 &
    #	real    56m44.375s

    cat fb.calJac1.chainCanFam2Link.txt
    #	1451345669 bases of 2929139385 (49.549%) in intersection

    #	reciprocal best for 9-way maf alignments
    ssh hgwdev
    #	expects blastz.canFam2 to exist
    cd /cluster/data/calJac1/bed
    ln -s blastz.canFam2.swap blastz.canFam2
    cd /cluster/data/calJac1/bed/blastz.canFam2
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 canFam2 \
	> rbest.log 2>&1 &
    #	real    70m45.324s

###########################################################################
# SWAP BLASTZ Chimp rheMac2 (DONE - 2007-11-18 - Hiram)
    #	primary blastz result
    cd /cluster/data/rheMac2/bed/blastzCalJac1.2007-11-16
    cat fb.rheMac2.chainCalJac1Link.txt
    #	2055107003 bases of 2646704109 (77.648%) in intersection

    #	and the download
    mkdir /cluster/data/calJac1/bed/blastz.rheMac2.swap
    cd /cluster/data/calJac1/bed/blastz.rheMac2.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/rheMac2/bed/blastzCalJac1.2007-11-16/DEF \
	-swap -chainMinScore=3000 -chainLinearGap=medium \
	-bigClusterHub=kk > swap.log 2>&1 &
    #	real    349m36.073s
    cat fb.calJac1.chainRheMac2Link.txt
    #	2191300051 bases of 2929139385 (74.810%) in intersection

    #	reciprocal best for 9-way maf alignments
    ssh hgwdev
    #	expects blastz.rheMac2 to exist
    cd /cluster/data/calJac1/bed
    ln -s blastz.rheMac2.swap blastz.rheMac2
    cd /cluster/data/calJac1/bed/blastz.rheMac2
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl calJac1 rheMac2 \
	> rbest.log 2>&1 &
    #	real    87m53.651s

#########################################################################
## 9-Way Multiz (DONE - 2007-12-21 - Hiram)
##
    ssh hgwdev
    mkdir /cluster/data/calJac1/bed/multiz9way
    cd /cluster/data/calJac1/bed/multiz9way
    #	take the 30-way tree from mm9 and eliminate genomes not in
    #	this alignment
    #	rearrange to get calJac1 on the top of the graph
    #	paste this tree into the on-line phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to create the image for the tree diagram

    #	select the 9 organisms from the 30-way recently done on mouse mm9
    /cluster/bin/phast/tree_doctor \
	--prune-all-but Human_hg18,Mouse_mm9,Chimp_panTro2,Marmoset_calJac1,Rhesus_rheMac2,Orangutan_ponAbe2,Dog_canFam2,Platypus_ornAna1,Opossum_monDom4 \
	/cluster/data/mm9/bed/multiz30way/mm9OnTop.fullNames.nh \
	> 9-way.fullNames.nh

    #	looks something like this:
((((Mouse_mm9:0.325818,
((((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
     Orangutan_ponAbe2:0.020000):0.013037,
       Rhesus_rheMac2:0.031973):0.036500,
Marmoset_calJac1:0.070000):0.058454):0.019763,
Dog_canFam2:0.187963):0.243550,
Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);

(((
(Mouse_mm9:0.325818,
(Marmoset_calJac1:0.070000,
 (((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
     Orangutan_ponAbe2:0.020000):0.013037,
       Rhesus_rheMac2:0.031973):0.036500):0.058454):0.019763,
Dog_canFam2:0.187963):0.243550,
Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);

    #	rearrange to get Marmoset at the top:
    # this leaves us with:
    cat << '_EOF_' > calJac1.9-way.nh
(((((Marmoset_calJac1:0.070000,
     (((Human_hg18:0.005873,Chimp_panTro2:0.007668):0.013037,
    Orangutan_ponAbe2:0.020000):0.013037,
   Rhesus_rheMac2:0.031973):0.036500):0.058454,
  Mouse_mm9:0.325818):0.019763,
 Dog_canFam2:0.187963):0.243550,
Opossum_monDom4:0.320721):0.088647,Platypus_ornAna1:0.488110);
'_EOF_'
    #	<< happy emacs

    #	create a species list from that file:
    sed -e 's/[()]//g; s/ /\n/g; s/,/\n/g' calJac1.9-way.nh \
        | sed -e "s/[ \t]*//g; /^[ \t]$/d; /^$/d" | sort -u \
        | sed -e "s/.*_//; s/:.*//" | sort > species.list
    #	verify that has 9 db names in it
    # create a stripped down nh file for use in autoMZ run
    echo \
`sed 's/[a-zA-Z0-9]*_//g; s/:0.[0-9]*//g; s/[,;]/ /g' calJac1.9-way.nh \
	| sed -e "s/  / /g"` > tree.9.nh
    #	that looks like, as a single line:
# (((((calJac1(((hg18 panTro2)ponAbe2)rheMac2)) mm9) canFam2) monDom4) ornAna1)

    # verify all blastz's exists
    cat << '_EOF_' > listMafs.csh
#!/bin/csh -fe
cd /cluster/data/calJac1/bed/multiz9way
foreach db (`grep -v calJac1 species.list`)
    set bdir = /cluster/data/calJac1/bed/blastz.$db
    if (-e $bdir/mafRBestNet/calJac1.$db.rbest.maf.gz) then
	echo "$db mafRBestNet"
    else if (-e $bdir/mafSynNet/calJac1.$db.net.maf.gz) then
	echo "$db mafSynNet"
    else if (-e $bdir/mafNet/calJac1.$db.net.maf.gz) then
	echo "$db mafNet"
    else
	echo "$db mafs not found"
    endif
end
'_EOF_'
    # << happy emacs
    chmod +x ./listMafs.csh
    #	see what it says, the "mafs not found" should only show up on calJac1
    ./listMafs.csh
# canFam2 mafRBestNet
# hg18 mafRBestNet
# mm9 mafRBestNet
# monDom4 mafNet
# ornAna1 mafNet
# panTro2 mafRBestNet
# ponAbe2 mafRBestNet
# rheMac2 mafRBestNet


    /cluster/bin/phast/all_dists calJac1.9-way.nh > 9way.distances.txt
    grep -i caljac 9way.distances.txt | sort -k3,3n
Marmoset_calJac1        Human_hg18      0.138447
Marmoset_calJac1        Rhesus_rheMac2  0.138473
Marmoset_calJac1        Orangutan_ponAbe2       0.139537
Marmoset_calJac1        Chimp_panTro2   0.140242
Marmoset_calJac1        Dog_canFam2     0.336180
Marmoset_calJac1        Mouse_mm9       0.454272
Marmoset_calJac1        Opossum_monDom4 0.712488
Marmoset_calJac1        Platypus_ornAna1        0.968524

    #	use the calculated
    #	distances in the table below to order the organisms and check
    #	the button order on the browser.  Zebrafish ends up before
    #	tetraodon and fugu on the browser despite its distance.
    #	And if you can fill in the table below entirely, you have
    #	succeeded in finishing all the alignments required.
    #
#                         featureBits chainLink measures
#                                       chainCalJac1Link   chain   linearGap
#    distance                     on CalJac1    on other   minScore
#  1  0.138447 Human_hg18         (% 82.846)   (% 78.351)   3000     medium
#  2  0.138473 Rhesus_rheMac2     (% 74.810)   (% 77.648)   3000     medium
#  3  0.139537 Orangutan_ponAbe2  (% 76.925)   (% 74.694)   3000     medium
#  4  0.140242 Chimp_panTro2      (% 77.296)   (% 76.308)   3000     medium
#  5  0.336180 Dog_canFam2        (% 57.429)   (% 49.549)   3000     medium
#  6  0.454272 Mouse_mm9          (% 30.302)   (% 32.971)   3000     medium
#  6  0.712488 Opossum_monDom4    (% 13.357)   (% 11.050)   5000     loose
#  7  0.968524 Platypus_ornAna1   (%  7.221)   (% 10.619)   5000     loose

    # copy net mafs to cluster-friendly storage, splitting chroms
    mkdir mafLinks
    cd mafLinks
    # hint: obtained these links by altering listMafs.csh above
    #	add an echo statement to output these commands
    ln -s ../../blastz.canFam2/mafRBestNet/calJac1.canFam2.rbest.maf.gz \
	mafLinks/canFam2.maf.gz
    ln -s ../../blastz.hg18/mafRBestNet/calJac1.hg18.rbest.maf.gz \
	mafLinks/hg18.maf.gz
    ln -s ../../blastz.mm9/mafRBestNet/calJac1.mm9.rbest.maf.gz \
	mafLinks/mm9.maf.gz
    ln -s ../../blastz.monDom4/mafNet/calJac1.monDom4.net.maf.gz \
	mafLinks/monDom4.maf.gz
    ln -s ../../blastz.ornAna1/mafNet/calJac1.ornAna1.net.maf.gz \
	mafLinks/ornAna1.maf.gz
    ln -s ../../blastz.panTro2/mafRBestNet/calJac1.panTro2.rbest.maf.gz \
	mafLinks/panTro2.maf.gz
    ln -s ../../blastz.ponAbe2/mafRBestNet/calJac1.ponAbe2.rbest.maf.gz \
	mafLinks/ponAbe2.maf.gz
    ln -s ../../blastz.rheMac2/mafRBestNet/calJac1.rheMac2.rbest.maf.gz \
	mafLinks/rheMac2.maf.gz
    #	need to split these things up by Contig number for efficient kluster run
    ssh kkstore06
    cd /cluster/data/calJac1/bed/multiz9way/mafLinks
    mkdir -p /san/sanvol1/scratch/calJac1/multiz9way/contigMaf
    cd /scratch/tmp
    for D in `grep -v calJac1 /cluster/data/calJac1/bed/multiz9way/species.list`
do
    mkdir /scratch/tmp/${D}
    cd /scratch/tmp/${D}
    mafSplit -verbose=2 /dev/null -byTarget -useSequenceName Contig \
	/cluster/data/calJac1/bed/multiz9way/mafLinks/${D}.maf.gz -outDirDepth=2
    rsync -a --progress ./ \
	/san/sanvol1/scratch/calJac1/multiz9way/contigMaf/${D}
    cd /scratch/tmp
    rm -fr ${D}
done
    #	create a run-time list of contigs to operate on, not all contigs
    #	exist in all alignments, but we want all contig names used in any
    #	alignment:
    cd /san/sanvol1/scratch/calJac1/multiz9way/contigMaf
    for D in *
do
    cd "${D}"
    find . -type f
    cd ..
done | sort -u > /tmp/9-way.contig.list
    wc -l /tmp/9-way.contig.list
    #	36707 /tmp/9-way.contig.list

    # ready for the multiz run
    ssh pk
    mkdir /cluster/data/calJac1/bed/multiz9way/splitRun
    cd /cluster/data/calJac1/bed/multiz9way/splitRun
    scp -p kkstore06:/tmp/9-way.contig.list .
    mkdir -p maf run
    cd run
    mkdir penn
    # use latest penn utilities
    P=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
    cp -p $P/{autoMZ,multiz,maf_project} penn

    #	set the db and pairs directories here
    cat > autoMultiz.csh << '_EOF_'
#!/bin/csh -ef
set db = calJac1
set subdir = $1
set c = $2
set result = $3
set resultDir = $result:h
set run = `pwd`
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /san/sanvol1/scratch/$db/multiz9way/contigMaf
rm -fr $tmp
mkdir -p $tmp
mkdir -p $resultDir
cp ../../tree.9.nh ../../species.list $tmp
pushd $tmp
foreach s (`grep -v $db species.list`)
    set in = $pairs/$s/$subdir/$c.maf
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
	zcat $in.gz > $out
    else if (-e $in) then
	cp $in $out
    else
	echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.9.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $result
rm -fr $tmp
rmdir --ignore-fail-on-non-empty /scratch/tmp/$db
'_EOF_'
# << happy emacs
    chmod +x autoMultiz.csh

    cat  << '_EOF_' > template
#LOOP
./autoMultiz.csh $(dir1) $(root1) {check out line+ /cluster/data/calJac1/bed/multiz9way/splitRun/maf/$(dir1)/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << emacs
    #	it is a single job since everything is in the same maf file
    time nice -n +19 ./autoMultiz.csh calJac1
    XXX - running 2007-12-21 16:32 on mkr0u3

    sed -e "s/^\.\///" ../9-way.contig.list \
	| gensub2 stdin single template jobList

    para create jobList
    para try ... check ... push ... etc
# Completed: 36707 of 36707 jobs
# CPU time in finished jobs:     244659s    4077.65m    67.96h    2.83d  0.008 y
# IO & Wait Time:                115457s    1924.29m    32.07h    1.34d  0.004 y
# Average job time:                  10s       0.16m     0.00h    0.00d
# Longest finished job:             249s       4.15m     0.07h    0.00d
# Submission to last job:          2454s      40.90m     0.68h    0.03d

    # put the split maf results back together into a single maf file
    #	eliminate duplicate comments
    ssh kkstore06
    cd /cluster/data/calJac1/bed/multiz9way
    mkdir togetherMaf
    grep "^##maf version" splitRun/maf/0/0/Contig00000.maf \
	| sort -u > togetherMaf/calJac1.9way.maf
    for F in `find ./splitRun/maf -type f -depth`
do
    grep -h "^#" "${F}" | egrep -v "maf version=1|eof maf" \
	| sed -e "s#/_MZ_[^ ]* # #g; s#__[0-9]##g"
done | sort -u >> togetherMaf/calJac1.9way.maf
    for F in `find ./splitRun/maf -type f -depth`
do
    grep -v -h "^#" "${F}"
done >> togetherMaf/calJac1.9way.maf
    grep "^##eof maf" splitRun/maf/0/0/Contig00000.maf \
	| sort -u >> togetherMaf/calJac1.9way.maf

    # load tables for a look
    ssh hgwdev
    mkdir -p /gbdb/calJac1/multiz9way/maf
    ln -s /cluster/data/calJac1/bed/multiz9way/togetherMaf/*.maf \
                /gbdb/calJac1/multiz9way/maf/multiz9way.maf
    # this generates an immense multiz9way.tab file in the directory
    #	where it is running.  Best to run this over in scratch.
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf \
	-pathPrefix=/gbdb/calJac1/multiz9way/maf calJac1 multiz9way
    #	real    5m6.330s
    #	Loaded 8484286 mafs in 1 files from /gbdb/calJac1/multiz9way/maf
    # load summary table
    time nice -n +19 cat /gbdb/calJac1/multiz9way/maf/*.maf \
	| hgLoadMafSummary calJac1 -minSize=30000 -mergeGap=1500 \
	 -maxSize=200000  multiz9waySummary stdin
    #	real    5m58.150s
    #	Created 121083 summary blocks from 3410157 components
    #	and 693943 mafs from stdin

    # Gap Annotation
    # prepare bed files with gap info
    ssh kkstore02
    mkdir /cluster/data/calJac1/bed/multiz9way/anno
    cd /cluster/data/calJac1/bed/multiz9way/anno
    mkdir maf run

    #	these actually already all exist from previous multiple alignments
    for DB in `cat ../species.list`
do
    CDIR="/cluster/data/${DB}"
    if [ ! -f ${CDIR}/${DB}.N.bed ]; then
	echo "creating ${DB}.N.bed"
	echo twoBitInfo -nBed ${CDIR}/${DB}.2bit ${CDIR}/${DB}.N.bed
    else
	ls -og ${CDIR}/${DB}.N.bed
    fi
done

    cd run
    rm -f nBeds sizes
    for DB in `grep -v calJac1 ../../species.list`
do
    echo "${DB} "
    ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
    echo ${DB}.bed  >> nBeds
    ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
    echo ${DB}.len  >> sizes
done

    ssh memk
    #	temporarily copy the calJac1.9way.maf file onto the memk
    #	nodes /scratch/data/calJac1/maf/ directory
    for R in 0 1 2 3 4 5 6 7
do
    ssh mkr0u${R} rsync -a --progress \
/cluster/data/calJac1/bed/multiz9way/togetherMaf/calJac1.9way.maf.gz \
	/scratch/data/calJac1/maf/
done
    mkdir /cluster/data/calJac1/bed/multiz9way/anno/splitMaf
    #	need to split up the single maf file into individual
    #	per-scaffold maf files to run annotation on
    cd /cluster/data/calJac1/bed/multiz9way/anno/splitMaf
    #	create bed files to list approximately 1553 scaffolds in
    #	a single list, approximately 33 lists
    cat << '_EOF_' > mkBedLists.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $bedCount = 0;
my $i = 0;

my $bedFile = sprintf("file_%d.bed", $bedCount);

open (BF,">$bedFile") or die "can not write to $bedFile $!";

open (FH,"</cluster/data/calJac1/chrom.sizes") or
        die "can not read /cluster/data/calJac1/chrom.sizes $!";
while (my $line = <FH>) {
    chomp $line;
    if ( (($i + 1) % 1553) == 0 )  {
        printf "%s\n", $line;
        close (BF);
        ++$bedCount;
        $bedFile = sprintf("file_%d.bed", $bedCount);
        open (BF,">$bedFile") or die "can not write to $bedFile $!";
    }
    ++$i;
    my ($chr, $size) = split('\s+',$line);
    printf BF "%s\t0\t%d\t%s\n", $chr, $size, $chr;
}
close (FH);
close (BH);
'_EOF_'
    # << happy emacs
    chmod +x mkBedLists.pl
    ./mkBedLists.pl

    #	now, run a mafsInRegion on each one of those lists
    cat << '_EOF_' > runOne
#!/bin/csh -fe
set runDir = "/cluster/data/calJac1/bed/multiz9way/anno/splitMaf"
set resultDir = $1
set bedFile = $resultDir.bed
mkdir -p $resultDir
mkdir -p /scratch/tmp/calJac1/$resultDir
pushd /scratch/tmp/calJac1/$resultDir
mafsInRegion $runDir/$bedFile -outDir . \
        /scratch/data/calJac1/maf/calJac1.9way.maf
popd
rsync -q -a /scratch/tmp/calJac1/$resultDir/ ./$resultDir/
rm -fr /scratch/tmp/calJac1/$resultDir
rmdir --ignore-fail-on-non-empty /scratch/tmp/calJac1
'_EOF_'
    # << happy emacs
    chmod +x runOne

    cat << '_EOF_' > template
#LOOP
./runOne $(root1)
#ENDLOOP
'_EOF_'
    # << happy emacs

    ls file*.bed > runList
    gensub2 runList single template jobList
    para create jobList
    para try ... check ... push ... etc
# Completed: 33 of 33 jobs
# CPU time in finished jobs:      11075s     184.58m     3.08h    0.13d  0.000 y
# IO & Wait Time:                 22992s     383.20m     6.39h    0.27d  0.001 y
# Average job time:                1032s      17.21m     0.29h    0.01d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:            2019s      33.65m     0.56h    0.02d
# Submission to last job:         22051s     367.52m     6.13h    0.26d

    cd /cluster/data/calJac1/bed/multiz9way/anno/run

    cat << '_EOF_' > doAnno.csh
#!/bin/csh -ef
set outDir = ../maf/$2
set result = $3
set input = $1
mkdir -p $outDir
cat $input | \
nice mafAddIRows -nBeds=nBeds stdin /scratch/data/calJac1/calJac1.2bit $result
'_EOF_'
    # << happy emacs
    chmod +x doAnno.csh

    cat << '_EOF_' > template
#LOOP
./doAnno.csh $(path1) $(lastDir1) {check out line+ ../maf/$(lastDir1)/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << happy emacs

    find ../splitMaf -type f -name "*.maf > maf.list
    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... push ... etc.
# Completed: 36707 of 36707 jobs
# CPU time in finished jobs:      94093s    1568.22m    26.14h    1.09d  0.003 y
# IO & Wait Time:                 94674s    1577.90m    26.30h    1.10d  0.003 y
# Average job time:                   5s       0.09m     0.00h    0.00d
# Longest finished job:              12s       0.20m     0.00h    0.00d
# Submission to last job:          6129s     102.15m     1.70h    0.07d

    ssh kkstore06
    cd /cluster/data/calJac1/bed/multiz9way/anno
    grep "^##maf version" maf/file_0/Contig0.maf \
	| sort -u > calJac1.anno.9way.maf
    find ./maf -type f -depth -name "*.maf" | while read F
do
    grep -v -h "^#" "${F}"
done >> calJac1.anno.9way.maf
    echo "##eof maf" >> calJac1.anno.9way.maf

    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/anno
    mkdir -p /gbdb/calJac1/multiz9way/anno
    ln -s `pwd`/calJac1.anno.9way.maf \
                /gbdb/calJac1/multiz9way/anno/multiz9way.maf
    #	by loading this into the table multiz9way, it will replace the
    #	previously loaded table with the unannotated mafs
    #	huge temp files are made, do them on local disk
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/calJac1/multiz9way/anno \
                calJac1 multiz9way
    #	Loaded 9243378 mafs in 1 files from /gbdb/calJac1/multiz9way/anno
    #	real    5m39.367s

    #	normally filter this for chrom size > 1,000,000 and only load
    #	those chroms.  But this is a scaffold assembly, load everything:
    hgLoadMafSummary calJac1 -minSize=30000 -mergeGap=1500 \
	-maxSize=200000  multiz9waySummary \
	    /gbdb/calJac1/multiz9way/anno/multiz9way.maf
    #	Created 121083 summary blocks from 3410157 components and 749940 mafs
    #	from /gbdb/calJac1/multiz9way/anno/multiz9way.maf

    #	by loading this into the table multiz9waySummary, it will replace
    #	the previously loaded table with the unannotated mafs
    #	remove the multiz9way*.tab files in this /scratch/tmp directory
    rm multiz9way*.tab
    #	And, you can remove the previously loaded non-annotated maf file link:
    rm /gbdb/calJac1/multiz9way/maf/multiz9way.maf
    rmdir /gbdb/calJac1/multiz9way/maf

###########################################################################
## Annotate 9-way multiple alignment with gene annotations
##		(DONE - 2008-01-08 - Hiram)
    # Gene frames
    ## given previous survey done for 8-way alignment on Orangutan,
    ## try using the following tables for this gene annotation
    #	use knownGene for hg18, mm9
    #	use ensGene for monDom4, ornAna1, panTro2, rheMac2
    #	new try with xenoMrna for ponAbe2, canFam2 and calJac1

    ssh hgwdev
    mkdir /cluster/data/calJac1/bed/multiz9way/frames
    cd /cluster/data/calJac1/bed/multiz9way/frames
    mkdir genes
    # knownGene
    for DB in hg18 mm9
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    # ensGene
    for DB in monDom4 ornAna1 panTro2 rheMac2
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done

    #	use xenoMrna for ponAbe2, canFam2, calJac1
    #	loxAfr1 oryCun1 ponAbe2
    for DB in ponAbe2 canFam2 calJac1
do
tmpExt=`mktemp temp.XXXXXX`
tmpMrnaCds=${DB}.mrna-cds.${tmpExt}
tmpMrna=${DB}.mrna.${tmpExt}
tmpCds=${DB}.cds.${tmpExt}
hgsql -N -e 'select xenoMrna.qName,cds.name,xenoMrna.* \
	   from xenoMrna,gbCdnaInfo,cds \
	   where (xenoMrna.qName = gbCdnaInfo.acc) and \
	     (gbCdnaInfo.cds != 0) and (gbCdnaInfo.cds = cds.id)' \
$DB > ${tmpMrnaCds}
cut -f 1-2  ${tmpMrnaCds} > ${tmpCds}
cut -f 4-100  ${tmpMrnaCds} > ${tmpMrna}
mrnaToGene -cdsFile=${tmpCds} -smallInsertSize=8 -quiet ${tmpMrna} stdout | \
genePredSingleCover stdin stdout | gzip -2c > /scratch/tmp/$DB.tmp.gz
rm ${tmpMrnaCds} ${tmpMrna} ${tmpCds}
mv /scratch/tmp/$DB.tmp.gz genes/$DB.gp.gz
rm -f $tmpExt
echo "${DB} done"
done

    ls -og genes
# -rw-rw-r--  1 2697811 Jan  8 15:43 calJac1.gp.gz
# -rw-rw-r--  1 2551552 Jan  8 15:41 canFam2.gp.gz
# -rw-rw-r--  1 2008806 Jan  8 15:33 hg18.gp.gz
# -rw-rw-r--  1 1965274 Jan  8 15:33 mm9.gp.gz
# -rw-rw-r--  1 1751726 Jan  8 15:33 monDom4.gp.gz
# -rw-rw-r--  1 1232719 Jan  8 15:33 ornAna1.gp.gz
# -rw-rw-r--  1 1980696 Jan  8 15:33 panTro2.gp.gz
# -rw-rw-r--  1 2703247 Jan  8 15:39 ponAbe2.gp.gz
# -rw-rw-r--  1 1935916 Jan  8 15:33 rheMac2.gp.gz

    ssh kkstore06
    cd /cluster/data/calJac1/bed/multiz9way/frames
    #	anything to annotate is in a pair, e.g.: calJac1 genes/calJac1.gp.gz
    time (cat  ../anno/calJac1.anno.9way.maf | nice -n +19 genePredToMafFrames calJac1 stdin stdout calJac1 genes/calJac1.gp.gz hg18 genes/hg18.gp.gz mm9 genes/mm9.gp.gz rheMac2 genes/rheMac2.gp.gz ponAbe2 genes/ponAbe2.gp.gz panTro2 genes/panTro2.gp.gz canFam2 genes/canFam2.gp.gz monDom4 genes/monDom4.gp.gz ornAna1 genes/ornAna1.gp.gz | gzip > multiz9way.mafFrames.gz) > frames.log 2>&1
    # see what it looks like in terms of number of annotations per DB:
    zcat multiz9way.mafFrames.gz | cut -f4 | sort | uniq -c | sort -n
# 206370 hg18
# 208834 panTro2
# 211731 rheMac2
# 224988 calJac1
# 225518 canFam2
# 225632 mm9
# 261163 ponAbe2
# 417544 ornAna1
# 462890 monDom4

    #	load the resulting file
    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/frames
    time nice -n +19 hgLoadMafFrames calJac1 multiz9wayFrames \
	multiz9way.mafFrames.gz
    #	real    0m38.282s

    #	enable the trackDb entries:
# frames multiz9wayFrames
# irows on
#############################################################################
# phastCons 9-way (DONE - 2007-10-16 - Hiram)

    # split 9way mafs into 10M chunks and generate sufficient statistics 
    # files for # phastCons
    ssh memk
    mkdir /cluster/data/calJac1/bed/multiz9way/msa.split
    cd /cluster/data/calJac1/bed/multiz9way/msa.split
    mkdir -p /san/sanvol1/scratch/calJac1/multiz9way/cons/ss

    cat << '_EOF_' > doSplit.csh
#!/bin/csh -ef
set MAFS = /cluster/data/calJac1/bed/multiz9way/anno/maf
set WINDOWS = /san/sanvol1/scratch/calJac1/multiz9way/cons/ss
pushd $WINDOWS
set resultDir = $1
set c = $2
rm -fr $resultDir/$c
mkdir -p $resultDir
twoBitToFa -seq=$c /scratch/data/calJac1/calJac1.2bit /scratch/tmp/calJac1.$c.fa
/cluster/bin/phast/$MACHTYPE/msa_split $MAFS/$resultDir/$c.maf -i MAF \
    -M /scratch/tmp/calJac1.$c.fa \
    -o SS -r $resultDir/$c -w 10000000,0 -I 1000 -B 5000
rm -f /scratch/tmp/calJac1.$c.fa
popd
mkdir -p $resultDir
date > $resultDir/$c.out
'_EOF_'
    # << happy emacs
    chmod +x doSplit.csh

    cat << '_EOF_' > template
#LOOP
doSplit.csh $(dir1) $(root1) {check out line+ $(dir1)/$(root1).out}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	create list of maf files:
    (cd ../anno/maf; find . -type f) | sed -e "s#^./##" > maf.list

    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... etc
# CPU time in finished jobs:       5250s      87.50m     1.46h    0.06d  0.000 y
# IO & Wait Time:                 94631s    1577.18m    26.29h    1.10d  0.003 y
# Average job time:                   3s       0.05m     0.00h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:               9s       0.15m     0.00h    0.00d
# Submission to last job:          3697s      61.62m     1.03h    0.04d

    # take the cons and noncons trees from the mouse 30-way

    #	Estimates are not easy to make, probably more correctly,
    #	take the 30-way .mod file, and re-use it here.
    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way
    cp -p /cluster/data/mm9/bed/multiz30way/mm9.30way.mod .

    # Run phastCons
    #	This job is I/O intensive in its output files, thus it is all
    #	working over in /scratch/tmp/
    ssh memk
    mkdir -p /cluster/data/calJac1/bed/multiz9way/cons/run.cons
    cd /cluster/data/calJac1/bed/multiz9way/cons/run.cons

    #	there are going to be several different phastCons runs using
    #	this same script.  They trigger off of the current working directory
    #	$cwd:t which is the "grp" in this script.  It is one of:
    #	all gliers placentals
    #	Well, that's what it was when used in the Mm9 30-way,
    #	in this instance, there is only the directory "all"

    cat << '_EOF_' > doPhast.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.2007-05-04
set subDir = $1
set f = $2
set c = $2:r
set len = $3
set cov = $4
set rho = $5
set grp = $cwd:t
set tmp = /scratch/tmp/$f
set cons = /cluster/data/calJac1/bed/multiz9way/cons
mkdir -p $tmp
set san = /san/sanvol1/scratch/calJac1/multiz9way/cons
if (-s $cons/$grp/$grp.non-inf) then
  cp -p $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
  cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
else
  cp -p $cons/$grp/$grp.mod $tmp
  cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $tmp
endif
pushd $tmp > /dev/null
if (-s $grp.non-inf) then
  $PHASTBIN/phastCons $f.ss $grp.mod \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --not-informative `cat $grp.non-inf` \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
else
  $PHASTBIN/phastCons $f.ss $grp.mod \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
endif
popd > /dev/null
mkdir -p $san/$grp/pp/$subDir $san/$grp/bed/$subDir
sleep 4
touch $san/$grp/pp/$subDir $san/$grp/bed/$subDir
rm -f $san/$grp/pp/$subDir/$f.pp
rm -f $san/$grp/bed/$subDir/$f.bed
mv $tmp/$f.pp $san/$grp/pp/$subDir
mv $tmp/$f.bed $san/$grp/bed/$subDir
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod a+x doPhast.csh

    cat << '_EOF_' > template
#LOOP
../doPhast.csh $(root1) $(file1) 45 .3 .31 {check out line+ /san/sanvol1/scratch/calJac1/multiz9way/cons/all/pp/$(root1)/$(file1).pp}
#ENDLOOP
'_EOF_'
    # << happy emacs

    # Create parasol batch and run it
    pushd /san/sanvol1/scratch/calJac1/multiz9way/cons
    find ./ss -type f -name "*.ss" | sed -e "s#^./##; s/.ss$//" \
	> /cluster/data/calJac1/bed/multiz9way/cons/ss.list

    # run for all species
    cd ..
    mkdir -p all run.cons/all
    cd all
    /cluster/bin/phast.new/tree_doctor ../../mm9.30way.mod \
--prune-all-but=calJac1,hg18,panTro2,rheMac2,ponAbe2,mm9,canFam2,monDom4,ornAna1 \
	> all.mod
    cd ../run.cons/all

    #	root1 == chrom name, file1 == ss file name without .ss suffix
    # Create template file for "all" run
    cat << '_EOF_' > template
#LOOP
../run.cons/doPhast.csh $(lastDir1) $(file1) 45 .3 .31 {check out line+ /san/sanvol1/scratch/calJac1/multiz9way/cons/all/pp/$(lastDir1)/$(file1).pp}
#ENDLOOP
'_EOF_'
    # << happy emacs
    gensub2 ../ss.list single template jobList
    para create jobList
    para try ... check ... push ... etc.
# Completed: 28485 of 28485 jobs
# CPU time in finished jobs:      14082s     234.70m     3.91h    0.16d  0.000 y
# IO & Wait Time:                188534s    3142.23m    52.37h    2.18d  0.006 y
# Average job time:                   7s       0.12m     0.00h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:              17s       0.28m     0.00h    0.00d
# Submission to last job:         72420s    1207.00m    20.12h    0.84d

    # create Most Conserved track
    ssh kolossus
    cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
    find ./bed -type f -name "Contig*.bed" | xargs cat \
	| sort -k1,1 -k2,2n | \
        awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' | \
            /cluster/bin/scripts/lodToBedScore /dev/stdin > mostConserved.bed
    #	~ 3 minutes
    cp -p mostConserved.bed /cluster/data/calJac1/bed/multiz9way/cons/all

    # load into database
    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/cons/all
    time nice -n +19 hgLoadBed calJac1 phastConsElements9way mostConserved.bed
    #	Loaded 1297014 elements of size 5

    # Try for 5% overall cov, and 70% CDS cov 
    #	We don't have any gene tracks to compare CDS coverage
    #	--rho .31 --expected-length 45 --target-coverage .3
    featureBits calJac1 phastConsElements9way
    #	141561229 bases of 2929139385 (4.833%) in intersection

    # Create merged posterier probability file and wiggle track data files
    # currently doesn't matter where this is performed, the san is the same
    # network distance from all machines.
    # sort by chromName, chromStart so that items are in numerical order 
    #  for wigEncode
    cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
    mkdir -p phastCons9wayScores

for D in `ls -1d pp/file* | sort -t_ -k2n`
do
    F=${D/pp\/}
    out=phastCons9wayScores/${F}.data.gz
    echo "${D} > ${F}.data.gz"
    ls -S ${D}/*.pp | xargs cat | gzip > ${out}
done

    #	real    38m22.760s
    #	copy the phastCons9wayScores to:
# /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way/phastConsScores
    #	for hgdownload downloads

    # Create merged posterier probability file and wiggle track data files
    # currently doesn't matter where this is performed, the san is the same
    # network distance from all machines.
    cd /san/sanvol1/scratch/calJac1/multiz9way/cons/all
    ls -1 phastCons9wayScores/*.data.gz | sort -t_ -k2n | xargs zcat \
	| wigEncode -noOverlap stdin phastCons9way.wig phastCons9way.wib
    # Converted stdin, upper limit 1.00, lower limit 0.00
    time nice -n +19 cp -p *.wi? /cluster/data/calJac1/bed/multiz9way/cons/all
    #	real    1m4.483s

    # Load gbdb and database with wiggle.
    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/cons/all
    ln -s `pwd`/phastCons9way.wib /gbdb/calJac1/multiz9way/phastCons9way.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/calJac1/multiz9way calJac1 \
	phastCons9way phastCons9way.wig
    #	real    0m56.271s
    # remove garbage
    rm wiggle.tab

    #  Create histogram to get an overview of all the data
    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/cons/all
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=calJac1 phastCons9way > histogram.data 2>&1
    #	real    5m0.608s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small color x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Orangutan PonAbe2 Histogram phastCons9way track"
set xlabel " phastCons9way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    #	These trackDb entries turn on the wiggle phastCons data track:
    #	type wigMaf 0.0 1.0
    #	maxHeightPixels 100:40:11
    #	wiggle phastCons9way
    #	spanList 1
    #	autoScale Off
    #	windowingFunction mean
    #	pairwiseHeight 12
    #	yLineOnOff Off

#############################################################################
#  Downloads (DONE - 2008-01-11 - Hiram)
    #	Let's see if the downloads will work
    ssh hgwdev
    /cluster/data/calJac1
    #	expecting to find repeat masker .out file here:
    ln -s bed/RepeatMasker/calJac1.fa.out .
    time nice -n +19 /cluster/bin/scripts/makeDownloads.pl \
	-workhorse=hgwdev calJac1 > jkStuff/downloads.log 2>&1
    #	real    24m3.210s
    #	failed making upstream sequences:
    #	featureBits calJac1 mgcGenes:upstream:1000 -fa=stdout
    #	setpriority: Permission denied.
    #	the 'nice' from my bash shell causes trouble inside the csh
    #	script which uses nice.  Finish off the install step manually
    #	with the mgcGenes upstreams ...

#############################################################################
#  PushQ entries (DONE - 2008-01-11 - Hiram)
    ssh hgwdev
    /cluster/data/calJac1
    /cluster/bin/scripts/makePushQSql.pl calJac1 > jkStuff/pushQ.sql
    #	output warnings:
# calJac1 does not have seq
# calJac1 does not have gbMiscDiff
# Could not tell (from trackDb, all.joiner and hardcoded lists of supporting
# and genbank tables) which tracks to assign these tables to:
#	genscanPep

#############################################################################
#  Create 9-way downloads (DONE - 2008-03-28 - Hiram)
    ssh hgwdev
    mkdir -p /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way
    cd /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way
    cp -p \
/san/sanvol1/scratch/calJac1/multiz9way/cons/all/phastCons9wayScores/* .
    ln -s ../../cons/all/all.mod ./9way.mod
    cp /cluster/data/ponAbe2/bed/multiz8way/downloads/phastCons8way/README.txt .
    # edit that README.txt to be correct for this 9-way alignment
    cd ..
    mkdir multiz9way
    cd multiz9way
    cp -p /cluster/data/ponAbe2/bed/multiz8way/downloads/multiz8way/README.txt .
    # edit that README.txt to be correct for this 9-way alignment
    ssh kkstore06
    mkdir -p /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
    cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
    ln -s ../../calJac1.9-way.nh ./9way.nh

    time nice -n +19 gzip -c ../../../anno/calJac1.anno.9way.maf \
	> calJac1.9way.maf.gz
    #	real    310m12.800s
    #	unusual long time due to nice +19 and conflice with other long-running
    #	jobs on kkstore06

    ssh hgwdev
    cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
    #	creating upstream files from xenoRefGene, bash script:
    cat << '_EOF_' > mkUpstream.sh
#!/bin/bash
DB=calJac1
GENE=xenoRefGene
NWAY=multiz9way
export DB GENE

for S in 1000 2000 5000
do
    echo "making upstream${S}.maf"
    featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
        | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags ${DB} ${NWAY} \
                stdin stdout \
                -orgs=/cluster/data/${DB}/bed/${NWAY}/species.list \
        | gzip -c > upstream${S}.maf.gz
    echo "done upstream${S}.maf.gz"
done
'_EOF_'
    # << happy emacs
    chmod +x ./mkUpstream.sh
    time nice -n +19 ./mkUpstream.sh
    #	real    119m5.562s
# -rw-rw-r--  1   42975041 Mar 28 14:27 upstream1000.maf.gz
# -rw-rw-r--  1   76363192 Mar 28 15:03 upstream2000.maf.gz
# -rw-rw-r--  1  303870318 Mar 28 15:42 upstream5000.maf.gz

    #	check the names in these upstream files to ensure sanity:
    zcat upstream1000.maf.gz | grep "^s " | awk '{print $2}' \
	| sort | uniq -c | sort -rn | less
    #	should be a list of the other 4 species with a high count,
    #	then xenoRefGene names, e.g.:
    #	51588 rheMac2
    #	51588 ponAbe2
    #	51588 panTro2
    #	51588 ornAna1
    #	51588 monDom4
    #	51588 mm9
    #	51588 hg18
    #	51588 canFam2
    #	   18 NM_001033610
    #	   17 NM_016957
    #	   17 NM_000992
    #	   16 NM_181722

    ssh kkstore06
    cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
    md5sum *.maf.gz > md5sum.txt
    cd ../phastCons9way
    md5sum *.data.gz *.mod > md5sum.txt

    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/calJac1/multiz9way
    mkdir /usr/local/apache/htdocs/goldenPath/calJac1/phastCons9way
    cd /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way
    ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/calJac1/multiz9way
    cd ../phastCons9way
    ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/calJac1/phastCons9way
    #	if your ln -s `pwd`/* made extra links to files you don't want there,
    #	check the goldenPath locations and remove those extra links

#############################################################################
# N-SCAN gene predictions (nscanGene) - (2008-04-03 markd)

    # obtained NSCAN predictions from michael brent's group
    # at WUSTL
    cd /cluster/data/calJac1/bed/nscan/
    wget http://mblab.wustl.edu/predictions/marmoset/calJac1/calJac1.gtf
    wget http://mblab.wustl.edu/predictions/marmoset/calJac1/calJac1.prot.fa
    wget http://mblab.wustl.edu/predictions/marmoset/calJac1/readme.html
    bzip2 calJac1.*
    chmod a-w *

    # load track
    gtfToGenePred -genePredExt calJac1.gtf.bz2 stdout | hgLoadGenePred -bin -genePredExt calJac1 nscanGene stdin
    hgPepPred calJac1 generic nscanPep  calJac1.prot.fa.bz2
    rm *.tab

    # update trackDb; need a calJac1-specific page to describe informants
    marmoset/calJac1/nscanGene.html   (copy from readme.html)
    marmoset/calJac1/trackDb.ra
    # set search regex to
        termRegex chr[0-9a-zA-Z_].*\.[0-9]+\.[0-9]

#############################################################################
############################################################################
# TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30

see doc/builds.txt for specific details.
############################################################################

################################################
# AUTOMATE UPSTREAM FILE CREATION (2008-10-15 markd)
update genbank.conf:
calJac1.upstreamGeneTbl = xenoRefGene
calJac1.upstreamMaf = multiz9way /hive/data/genomes/calJac1/bed/multiz9way/species.list

############################################################################
# QUALITY TRACK (DONE - 2008-11-25 - Hiram)
    mkdir /hive/data/genomes/calJac1/bed/qual
    cd /hive/data/genomes/calJac1/bed/qual
    #	the qac file was created by Rico during 28-way annotations
    qacToWig -fixed ../quality/calJac1.qac stdout \
	| wigEncode stdin qual.wig qual.wib
    ln -s `pwd`/qual.wib /gbdb/calJac1/wib
    hgLoadWiggle -pathPrefix=/gbdb/calJac1/wib calJac1 quality qual.wig
############################################################################
# TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13

see doc/builds.txt for specific details.

############################################################################
# LIFTOVER TO calJac3 (DONE - 2010-02-11 - Hiram )
    mkdir /hive/data/genomes/calJac1/bed/blat.calJac3.2010-02-11
    cd /hive/data/genomes/calJac1/bed/blat.calJac3.2010-02-11
    # -debug run to create run dir, preview scripts...
    doSameSpeciesLiftOver.pl -debug calJac1 calJac3
    # Real run:
    time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	 calJac1 calJac3 > do.log 2>&1
    #	real    36m16.693s
#############################################################################