# for emacs: -*- mode: sh; -*-


# DOWNLOAD SEQUENCE (DONE 10/15/04 Fan)
    ssh kksilo
    mkdir /cluster/store8/xenTro1
    cd /cluster/data
    ln -s /cluster/store8/xenTro1 xenTro1
    cd /cluster/data/xenTro1
    mkdir downloads
    cd downloads

    wget --timestamp ftp://ftp.jgi-psf.org/pub/JGI_data/Frog/v3.0/xenopus.041015.fasta.gz
    wget --timestamp ftp://ftp.jgi-psf.org/pub/JGI_data/Frog/v3.0/xenopus.041015.allmasked.gz
    wget --timestamp ftp://ftp.jgi-psf.org/pub/JGI_data/Frog/v3.0/repeats_lib/xt3.lib1.fasta.gz

    gzip -d *.gz

    cd ..

# SPLIT SCAFFOLDS (DONE 10/17/04, Fan)

  ssh kksilo
  cd /cluster/data/xenTro1
  mkdir scaffolds html

# /cluster/bin/i386/faSplit byname downloads/xenopus.041015.fasta scaffolds -outDirDepth=3
# Tried above and as noted by Heather before, not sure why these didn't get put under scaffolds 
# directory, removed 1, 2, 3, ....  Tried it differently.

  cd scaffolds
  /cluster/bin/i386/faSplit byname ../downloads/xenopus.041015.fasta scaffolds -outDirDepth=3
  cd ..

  # generate list
  find scaffolds -print > scaffolds.list.0
  grep fa scaffolds.list.0 > scaffolds.list

# MAKE 2BIT NIB FILE (DONE 10/18/04, Fan)

  ssh kksilo
  cd /cluster/data/xenTro1/downloads
  /cluster/bin/i386/faToTwoBit xenopus.041015.fasta xenTro1.2bit
# The xenopus.041015.fasta file has 70 bases each line instead of 50.
# Created a specialized twoBitToFa70 program to generate 70 bases fa file.
  /cluster/bin/i386/twoBitToFa70 xenTro1.2bit check70.fa
# diff got memory exhausted error on kksilo
# went to kolossus and did diff between check70.fa and xenopus.041015.fasta.
# they are identical except xenopus.041015.fasta has some empty lines at end of
# some records
  mv xenTro1.2bit ..
  ssh hgwdev
  mkdir /gbdb/xenTro1
  ln -s /cluster/data/xenTro1/html /gbdb/xenTro1
  ln -s /cluster/data/xenTro1/xenTro1.2bit /gbdb/xenTro1

# CREATE DATABASE

  ssh hgwdev
  hgsql hg17
  create database xenTro1;
  use xenTro1;
  create table grp (PRIMARY KEY(NAME)) select * from hg17.grp;
  # add rows to hgcentraltest on hgwbeta in dbDb and defaultDb
  # set orderKey past chicken, before Zebrafish
    ## re-work orderKey 2007-02-20 to get Lizard above frog - Hiram
    hgsql -e 'update dbDb set orderKey="446" where name="xenTro1";' \
	hgcentraltest

# CREATE AGP FILES AND GAP/GOLD TABLES (DONE JK 10/23/04)
  ssh kksilo
  cd /cluster/data/xenTro1
  hgFakeAgp downloads/xenopus.041015.fasta xenTro1.agp
  
  ssh hgwdev
  cd /cluster/data/xenTro1
  hgGoldGapGl xenTro1 xenTro1.agp

# CREATE CHROMINFO TABLE (DONE 10/18/04, Fan)
# an improvement here would be to have getMaxCoord output the 2bit file name
  ssh hgwdev
  cd /cluster/data/xenTro1
  faSize -detailed=on downloads/xenopus.041015.fasta \
    | awk '{printf("%s\t%s\t%s\n", $1, $2, "/gbdb/xenTro1/xenTro1.2bit");}' \
    > chromInfo.tab
  hgsql xenTro1 < ~/src/hg/lib/chromInfo.sql
  hgsql xenTro1 -e 'load data local infile "chromInfo.tab" into table chromInfo'

# REPEATMASKER (DONE,10/20/04, Fan)
# using the split scaffold fa files generated earlier
# do a trial run
  ssh hgwdev
  cd /cluster/data/xenTro1/scaffolds/0/0/0
  /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -spec bos SCAFFOLD1000.fa
# configure
  cd /cluster/data/xenTro1
  cp -p downloads/xt3.lib1.fasta /cluster/bluearc/RepeatMasker/Libraries/xt3_JGI.lib
  cp /cluster/data/anoGam1/jkStuff/RMAnopheles jkStuff/RMXenTro
  # edit jkStuff/RMXenTro to change all references anoGam1 --> xenTro1
  # and use the following line to run RepeatMasker
  # /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -xsmall -lib /cluster/bluearc/RepeatMasker/Librari
es/xt3_JGI.lib $2
  mkdir RMRun

  # /bin/csh makeJoblist-RM.csh  
  # what the above line is for?

  cd scaffolds
  foreach i (0 1 2 3 4 5 6 7 8 9)
    cd $i
    foreach j (0 1 2 3 4 5 6 7 8 9)
      cd $j
      foreach k (0 1 2 3 4 5 6 7 8 9)
        cd $k
        foreach f (*.fa)
          echo /cluster/data/xenTro1/jkStuff/RMXenTro \
	       /cluster/data/xenTro1/scaffolds/$i/$j/$k $f \
	      '{'check out line+ /cluster/data/xenTro1/scaffolds/$i/$j/$k/$f.out'}' \
	  >> /cluster/data/xenTro1/RMRun/RMJobs
	end
      cd ..
      end
    cd ..
    end
  cd ..
  end

  # do the run
  ssh kk
  cd /cluster/data/xenTro1/RMRun
  para create RMJobs
  para try
  para check
  para push
  para check
  para time
# Completed: 27064 of 27064 jobs
# CPU time in finished jobs:    8152873s  135881.22m  2264.69h   94.36d  0.259 y
# IO & Wait Time:                287040s    4784.00m    79.73h    3.32d  0.009 y
# Average job time:                 312s       5.20m     0.09h    0.00d
# Longest job:                    46100s     768.33m    12.81h    0.53d
# Submission to last job:         46220s     770.33m    12.84h    0.53d

# Our RepeatMasker results are not exactly the same as JGI's, but close.
# JGI has not provided .out files requested earlier as of 10/24/04.
# With Jim's approval, proceeded with our RepeatMasker results.
# Note: Jim earlier went ahead with JGI's repeat masked result for 
# his blastz run.
+-----------+-----------+-------------+------------+------------+------------+
| sum(a)    | sum(b)    | sum(abBoth) | sum(aOnly) | sum(bOnly) | sum(total) |
+-----------+-----------+-------------+------------+------------+------------+
| 177141183 | 177367102 |   174224057 |    2917126 |    3143045 | 1630713514 |
+-----------+-----------+-------------+------------+------------+------------+

  # concatenate into one output file; took about 90 minutes
  ssh kksilo
  cd /cluster/data/xenTro1
  mkdir repeats
  /bin/tcsh concatRM.tcsh
  cd repeats
  head -2 repeats.all >repeats.out
  fgrep scaffold_ repeats.all |grep -v >>repeats.out

  # get the header (first 2 lines) to make hgLoadOut happy
  head -2 repeats.all >repeats.out

  # filter out no repetitive sequence found ... message lines
  fgrep scaffold_ repeats.all |grep -v 'no repetitive' >>repeats.out
  
  ssh hgwdev
  cd /cluster/data/xenTro1/repeats
  hgLoadOut -nosplit xenTro1 repeats.out
  hgsql xenTro1
  rename table repeats_rmsk to rmsk

  # select count(*) from rmsk;
  # 1140362
  # select count(*) from rmsk where repName like "TE_ORF%";
  # 164354

# SIMPLE REPEATS (DONE 10/22/04, Fan)
# put the results throughout the scaffold 0/0/0 directories,
# same as RepeatMasker, to avoid too many files in the same directory
  ssh kksilo
  cd /cluster/data/xenTro1
  mkdir bed
  cd bed
  mkdir simpleRepeat
  cd simpleRepeat

  # /bin/csh makeJoblist-trf.csh

  # do the run; took about 12 hours
  tcsh trf-run.csh > trf.log

  # concatenate into one output file; took about an hour
  /bin/tcsh concatTRF.csh

  # load
  ssh hgwdev
  cd /cluster/data/xenTro1/bed/simpleRepeat
  /cluster/bin/i386/hgLoadBed xenTro1 simpleRepeat trf.bed \
    -sqlTable=/cluster/home/fanhsu/src/hg/lib/simpleRepeat.sql

  # Reading trf.all
  # Loaded 505308 elements of size 16
  # Sorted
  # Saving bed.tab
  # Loading xenTro1

# CREATE MASKED FA USING REPEATMASKER AND FILTERED TRF FILES (DONE Oct. 22, 2004, Fan) 

  ssh kksilo
  cd /cluster/data/xenTro1
  /cluster/bin/i386/maskOutFa -soft downloads/xenopus.041015.fasta repeats/repeats.out xenTro1.softmask.fa

  # 1 warnings about negative rEnd
  # WARNING: negative rEnd: -18 scaffold_69:2353795-2354532 XL1723L
  # matches select count(*) from rmsk where repEnd < 0;

  /cluster/bin/i386/maskOutFa -softAdd xenTro1.softmask.fa bed/simpleRepeat/trf.bed xenTro1.softmask2.fa

  # hard masking (Ns instead of lower case) for download files

# REGENERATE 2BIT NIB (DONE, 10/22/04, Fan)
  ssh kksilo
  cd /cluster/data/xenTro1
  /cluster/bin/i386/faToTwoBit xenTro1.softmask2.fa xenTro1.softmask.2bit
  /cluster/bin/i386/twoBitToFa xenTro1.softmask.2bit check.softmask.fa
  diff -q xenTro1.softmask2.fa check.softmask.fa
# They are the same.
  mv xenTro1.2bit xenTro1.2bit.unmasked
  mv xenTro1.softmask.2bit xenTro1.2bit

# MAKE 11.OOC FILE FOR BLAT (DONE, 10/22/04, Fan)
  ssh kkr1u00
  mkdir /cluster/data/xenTro1/bed/ooc
  cd /cluster/data/xenTro1/bed/ooc
  ls -1 /cluster/data/xenTro1/xenTro1.2bit > nib.lst
  mkdir /cluster/bluearc/xenTro1
  /cluster/bin/i386/blat nib.lst /dev/null /dev/null -tileSize=11 \
    -makeOoc=/cluster/bluearc/xenTro1/11.ooc -repMatch=540
  # Wrote 26600 overused 11-mers to /cluster/bluearc/xenTro1/11.ooc
  cp -p /cluster/bluearc/xenTro1/11.ooc /iscratch/i/xenTro1/
  iSync

# GENBANK (DONE, 10/26/04, Fan)
  ssh hgwdev
  cd /cluster/home/fanhsu/kent/src/hg/makeDb/genbank
  # check for missing commits
  diff /cluster/data/genbank/etc/genbank.conf etc/genbank.conf
  # edit etc/genbank.conf and add these lines, starting with the comment:
  # xenTro1 (X. tropicalis)
  # could get rid of the nib in this path, 2bit isn't a nib (4bit) ???

# xenTro1 (X. tropicalis) 27064 scaffolds!
# This uses the mondo 2bit file functionality, where multiple
# sequences from a two bit are alignmed from a single jobs.
# Genbank X. tropicalis
#     5365 mRNAs
#   423107 ESTs
# partation 2bit into 5000 parts for initial build
xenTro1.genome = /iscratch/i/xenTro1/nib/xenTro1.2bit
xenTro1.mondoTwoBitParts = 5000
xenTro1.lift = no
xenTro1.refseq.mrna.native.load = no
xenTro1.genbank.mrna.xeno.load = no
xenTro1.genbank.est.xeno.load = no
xenTro1.downloadDir = xenTro1
xenTro1.perChromTables = no

cvs update etc/genbank.conf
cvs commit etc/genbank.conf
# revision: 1.64

  # edit src/lib/gbGenome.c
  make
  cvs commit -m "added frog" src/lib/gbGenome.c
  # revision 1.21

  # edit src/align/gbBlat
  make
  cvs commit -m "added frog" src/align/gbBlat
  # revision 1.33

  make install-server

  ssh eieio
  cd /cluster/data/genbank
  bin/gbAlignStep -srcDb=genbank -type=mrna -initial -verbose=1 xenTro1 &
  # logged to 
  # /cluster/data/genbank/var/build/logs/YYMMDDHHMMSS.xenTro1.initalign.log
  # load
  ssh hgwdev
  cd /cluster/data/genbank
  nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad xenTro1
  # logged to /cluster/data/genbank/var/dbload/hgwdev/logs/YYMMDDHHMMSS.dbload.log

  # ESTs required modification to the build process to deal with the huge
  # number of scaffolds.  Once completed, the following line was
  # added to genbank.conf to enable it's uses:
    xenTro1.mondoTwoBitParts = 5000
  ssh eieio
  cd /cluster/data/genbank
  bin/gbAlignStep -initial -srcDb=genbank -type=est xenTro1
  
  # load the ESTs, reloading mRNAs as well
  ssh hgwdev
  cd /cluster/data/genbank
  nice bin/gbDbLoadStep -drop -initialLoad xenTro1

  # add xenTro1 to list of databases to align in
  # ~/src/hg/makeDb/genbank/etc/align-genbank
  # CVS update and check in the file.

# GC5BASE - (DONE 2004-10-27 - Hiram)
    ssh kksilo
    mkdir /cluster/data/xenTro1/bed/gc5Base
    cd /cluster/data/xenTro1/bed/gc5Base
    hgGcPercent -wigOut -doGaps -file=stdout -win=5 xenTro1 \
      /cluster/data/xenTro1 | wigBedToBinary stdin gc5Base.wig gc5Base.wib
    #       Calculating gcPercent with window size 5
    #       Using twoBit: /cluster/data/xenTro1/xenTro1.2bit
    #       File stdout created
    #	Converted stdin, upper limit 100.00, lower limit 0.00
    #	real    332m59.680s
    #	user    150m45.460s
    #	sys     157m28.750s

    #	Now load those results into the database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/gc5Base
    mkdir /gbdb/xenTro1/wib
    ln -s `pwd`/gc5Base.wib /gbdb/xenTro1/wib
    hgLoadWiggle -pathPrefix=/gbdb/xenTro1/wib xenTro1 gc5Base gc5Base.wig

# PRODUCE X. LAEVIS MRNA ALIGNMENTS (IN PROGRESS 11/6/05 JK)
    # Make working directory and get Laevis RNA
    ssh hgwdev
    mkdir /san/sanvol1/xenTro1/xenLaeRna
    cd /san/sanvol1/xenTro1/xenLaeRna
    hgSpeciesRna hg17 Xenopus laevis xenLaeRna.fa
    cp /cluster/data/xenTro1/xenTro1.2bit /san/sanvol1/scratch/xenTro1

    # Split RNA into 4
    mkdir split
    faSplit sequence xenoLaeRna.fa 4 split/xl

    # Do blatz run on pk.
    mkdir /san/sanvol1/xenTro1/xenLaeRna
    mkdir run1
    cd run1
    mkdir psl
    awk '{print $1;}' /cluster/data/xenTro1/chrom.sizes > genome.lst
    ls -1 ../split/*.fa > mrna.lst

    cat << '_EOF_' > gsub
#LOOP
blatz -rna -minScore=6000 /san/sanvol1/scratch/xenTro1/xenTro1.2bit:$(file1) $(path2) -out=psl {check out line psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    gensub2 genome.lst mrna.lst gsub spec
    para create spec
    # Then do usual para try/para push/para time
#Completed: 108256 of 108256 jobs
#CPU time in finished jobs:    1933198s   32219.96m   537.00h   22.37d  0.061 y
#IO & Wait Time:                275698s    4594.97m    76.58h    3.19d  0.009 y
#Average job time:                  20s       0.34m     0.01h    0.00d
#Longest running job:                0s       0.00m     0.00h    0.00d
#Longest finished job:             986s      16.43m     0.27h    0.01d
#Submission to last job:         24061s     401.02m     6.68h    0.28d

    # Do sort and near-besting 
    catDir psl | pslFilter -minScore=100 -minAli=255 -noHead stdin stdout | sort -k 10 > ../filtered.psl
    cd ..
    pslReps filtered.psl nearBest.psl /dev/null -nohead -minAli=0.5 -nearTop=0.01 -minCover=0.255
    sort -k 14 nearBest.psl > xenLaeRna.psl

    # Clean up
    rm -r run1/psl 

    # Load into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/xenLaeRna
    ln -s /cluster/data/xenTro1/bed/xenLaeRna/xenLaeRna.fa /gbdb/xenTro1/xenLaeRna.fa
    hgLoadSeq xenTro1 /gbdb/xenTro1/xenLaeRna.fa
    hgLoadPsl xenTro1 xenLaeRna.psl 


# PRODUCE X. LAEVIS MRNA ALIGNMENTS (DONE 1/14/05 JK)
    # Make working directory and get Laevis RNA and DNA
    ssh hgwdev
    mkdir -p /cluster/data/xenTro1/bed/xenLaeRna
    cd /cluster/data/xenTro1/bed/xenLaeRna
    hgSpeciesRna hg17 Xenopus laevis xenLaeRna.faa

    # Split RNA into 3 and load on temp device
    ssh kkr1u00
    cd /iscratch/i/xenTro1
    mkdir xenLaeRna
    cd xenLaeRna
    faSplit sequence /cluster/data/xenTro1/bed/xenLaeRna/xenLaeRna.fa 3 xl
    iSync

    # Do blatz cluster run
    ssh kk
    cd /cluster/data/xenTro1/bed/xenLaeRna
    mkdir run1
    cd run1
    mkdir psl
    awk '{print $1;}' /cluster/data/xenTro1/chrom.sizes > genome.lst
    ls -1 /iscratch/i/xenTro1/xenLaeRna/*.fa > mrna.lst
    cat << '_EOF_' > gsub
#LOOP
blatz -rna -minScore=6000 /iscratch/i/xenTro1/nib/xenTro1.2bit:$(file1) $(path2) -out=psl {check out line psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy
    gensub2 genome.lst mrna.lst gsub spec
    para create spec
    # Then do usual para try/para push/para time
CPU time in finished jobs:    3896883s   64948.04m  1082.47h   45.10d  0.124 y
IO & Wait Time:               1031103s   17185.06m   286.42h   11.93d  0.033 y
Average job time:                  61s       1.01m     0.02h    0.00d
Longest job:                     3276s      54.60m     0.91h    0.04d
Submission to last job:          5989s      99.82m     1.66h    0.07d

    # Do sort and near-besting on file server
    ssh kksilo
    cd /cluster/data/xenTro1/bed/xenLaeRna/run1
    catDir psl | pslFilter -minScore=100 -minAli=255 -noHead stdin stdout | sort -k 10 > ../filtered.psl
    cd ..
    pslReps filtered.psl nearBest.psl /dev/null -nohead -minAli=0.5 -nearTop=0.01 -minCover=0.255
    sort -k 14 nearBest.psl > xenLaeRna.psl

    # Clean up
    rm -r run1/psl 

    # Load into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/xenLaeRna
    ln -s /cluster/data/xenTro1/bed/xenLaeRna/xenLaeRna.fa /gbdb/xenTro1/xenLaeRna.fa
    hgLoadSeq xenTro1 /gbdb/xenTro1/xenLaeRna.fa
    hgLoadPsl xenTro1 xenLaeRna.psl 


# PRODUCING GENSCAN PREDICTIONS (DONE 10/27/04 Fan)
# GENSCAN RELOADED WITHOUT -genePredExt (DONE 01-06-05 angie)
# MOVE TIMESTAMP ON GENSCANPEP TO SATSIFY ALL.JOINER (DONE 03-03-05 alisq, kuhn)
  
    ssh hgwdev
    mkdir /cluster/data/xenTro1/bed/genscan
    cd /cluster/data/xenTro1/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux

    # create hard masked .fa files
    ssh kksilo
    cd /cluster/store8/xenTro1
    maskOutFa xenTro1.softmask2.fa hard xenTro1.hardmask2.fa
    /cluster/bin/i386/faSplit about xenTro1.hardmask2.fa 2000000 hardMasked2/

    # Run on small cluster (more mem than big cluster).
    ssh kki
    cd /cluster/data/xenTro1/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    rm -f genome.list
    touch genome.list

    foreach f ( `ls -1S /cluster/data/xenTro1/hardMasked2/*` )
      egrep '[ACGT]' $f > /dev/null
      if ($status == 0) echo $f >> genome.list
    end     
    wc -l genome.list
    # Create template file, gsub, for gensub2.  For example (3-line file):
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/x86_64/gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out 
line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -
par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    gensub2 genome.list single gsub jobList
    para create jobList
    para try, check, push, check, ...
# Completed: 645 of 648 jobs
# Crashed: 3 jobs
# CPU time in finished jobs:      43701s     728.35m    12.14h    0.51d  0.001 y
# IO & Wait Time:                  4362s      72.70m     1.21h    0.05d  0.000 y
# Average job time:                  75s       1.24m     0.02h    0.00d
# Longest job:                      435s       7.25m     0.12h    0.01d
# Submission to last job:          3641s      60.68m     1.01h    0.04d

    # If there are crashes, diagnose with "para problems".  
    # If a job crashes due to genscan running out of memory, re-run it 
    # manually with "-window=1200000" instead of "-window=2400000".
    ssh kkr7u00
    cd /cluster/data/xenTro1/bed/Genscan

/cluster/bin/x86_64/gsBig /cluster/data/xenTro1/hardMasked2/05.fa gtf/05.gtf -trans=pep/05.pep -
subopt=subopt/05.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -
tmp=/tmp -window=1200000

/cluster/bin/x86_64/gsBig /cluster/data/xenTro1/hardMasked2/76.fa gtf/76.gtf -trans=pep/76.pep -
subopt=subopt/76.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -
tmp=/tmp -window=1200000

/cluster/bin/x86_64/gsBig /cluster/data/xenTro1/hardMasked2/116.fa gtf/116.gtf -trans=pep/116.pep -
subopt=subopt/116.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -
tmp=/tmp -window=1200000

    ls -1 gtf | wc -l
#    648
    endsInLf gtf/*

    # Convert these to chromosome level files as so:
    ssh kksilo
    cd /cluster/data/xenTro1/bed/genscan

    cat gtf/*.gtf >genscan.gtf
    cat pep/*.pep > genscan.pep
    cat subopt/*.bed >genscanSubopt.bed

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/genscan
    # Reloaded without -genePredExt 1/6/05:
    ldHgGene -gtf xenTro1 genscan genscan.gtf
    hgPepPred xenTro1 generic genscanPep genscan.pep
    hgLoadBed xenTro1 genscanSubopt genscanSubopt.bed

    nice featureBits xenTro1 genscan
    # 63766483 bases of 1381238994 (4.617%) in intersection

# MAKE Human Proteins track (hg17) (DONE 2004-11-04 braney)
    ssh kksilo
    mkdir -p /cluster/data/xenTro1/blastDb
    cd /cluster/data/xenTro1/blastDb
    faSplit sequence ../downloads/xen*fasta 500 x 
    for i in x*
    do
	formatdb -i $i -p F
    done
    rm *.log *.fa

    ssh kkr1u00
    mkdir -p /iscratch/i/xenTro1/blastDb
    cp /cluster/data/xenTro1/blastDb/* /iscratch/i/xenTro1/blastDb
    (iSync) > sync.out
    exit # back to kksilo

    mkdir -p /cluster/data/xenTro1/bed/tblastn.hg17KG
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    ls -1S /iscratch/i/xenTro1/blastDb/*.nsq | sed "s/\.nsq//" > frog.lst

    mkdir kgfa
    calc `wc /cluster/data/hg16/bed/blat.hg16KG/hg16KG.psl | awk "{print \\\$1}"`/\(150000/`wc frog.lst | awk "{print \\\$1}"`\)
    # 41117/(150000/498) = 136.508440
    mkdir -p /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa
    split -l 137 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa/kg
    ln -s /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa kgfa
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
if /cluster/bin/i386/blastToPsl $f.1 $f.2
then
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.2
	if pslCheck -prot $3.tmp
	then
	    mv $3.tmp $3
	    rm -f $f.1 $f.2 
	fi
        exit 0
    fi
fi
rm -f $f.1 $f.2 $3.tmp $f.8
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 frog.lst kg.lst blastGsub blastSpec

    ssh kk
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    para create blastSpec
    para push
# Completed: 146412 of 146412 jobs
# CPU time in finished jobs:   43595624s  726593.73m 12109.90h  504.58d  1.382 y
# IO & Wait Time:               1997243s   33287.38m   554.79h   23.12d  0.063 y
# Average job time:                 311s       5.19m     0.09h    0.00d
# Longest job:                     1860s      31.00m     0.52h    0.02d
# Submission to last job:         54876s     914.60m    15.24h    0.64d

    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainSome
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainSome

    ls -1dS `pwd`/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    ssh kki
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    para create chainSpec
    para push
# Completed: 294 of 294 jobs
# CPU time in finished jobs:        893s      14.88m     0.25h    0.01d  0.000 y
# IO & Wait Time:                 20958s     349.30m     5.82h    0.24d  0.001 y
# Average job time:                  74s       1.24m     0.02h    0.00d
# Longest job:                      156s       2.60m     0.04h    0.00d
# Submission to last job:          1727s      28.78m     0.48h    0.02d

    exit
    # back to kksilo
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG/blastOut
    for i in kg??
    do 
	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat u.*.psl m60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > ../blastHg17KG.psl
    cd ..

    ssh hgwdev
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    hgLoadPsl xenTro1 blastHg17KG.psl

    # back to kksilo
    rm -rf blastOut
# End tblastn

# PRODUCING FUGU BLAT ALIGNMENTS (DONE 10/27/04 Fan)
    ssh kksilo
    cd /cluster/data/xenTro1
    mkdir softMasked2
    cd softMasked2
    /cluster/bin/i386/faSplit about ../xenTro1.softmask2.fa 2000000 ./

    ssh kkr1u00
    rm -rf /iscratch/i/xenTro1/softMasked2
    mkdir /iscratch/i/xenTro1/softMasked2
    foreach d (/cluster/data/xenTro1/softMasked2/*)
    cp $d /iscratch/i/xenTro1/softMasked2
    end
    iSync

    ssh kk
    mkdir /cluster/data/xenTro1/bed/blatFr1
    cd /cluster/data/xenTro1/bed/blatFr1
    ls -1S /cluster/bluearc/fugu/fr1/trfFa/*.fa > fugu.lst
    ls -1S /iscratch/i/xenTro1/softMasked2/*.fa > frog.lst
    cat << '_EOF_' > gsub
#LOOP
blat -mask=lower -q=dnax -t=dnax {check in exists $(path1)} {check in line+ $(path2)} {check out line+ 
psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    mkdir psl
    gensub2 frog.lst fugu.lst gsub spec
    para create spec
    para try, check, push, check, ...
# Completed: 4536 of 4536 jobs
# CPU time in finished jobs:    3466126s   57768.77m   962.81h   40.12d  0.110 y
# IO & Wait Time:                458981s    7649.68m   127.49h    5.31d  0.015 y
# Average job time:                 865s      14.42m     0.24h    0.01d
# Longest job:                     3763s      62.72m     1.05h    0.04d
# Submission to last job:          7688s     128.13m     2.14h    0.09d
    # Sort alignments:
    ssh kksilo
    cd /cluster/data/xenTro1/bed/blatFr1
    
# added the liftUp step, and reloaded blatFr1 table.  11/16/04 Fan.    
    pslCat -dir psl \
    | liftUp -type=.psl -pslQ stdout /cluster/data/fr1/jkStuff/liftAll.lft \
          warn stdin \
    | pslSortAcc nohead chrom /tmp stdin

    # load psl into database:
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/blatFr1/chrom
    foreach d (/cluster/data/xenTro1/bed/blatFr1/chrom/*)
    cat $d >>blatFr1_all.psl
    end
    # Special case: fr1 has only chrUn, so no need to index tName!
    hgLoadPsl -noTNameIx -fastLoad -table=blatFr1 xenTro1 blatFr1_all.psl
    #load of blatFr1 did not go as planned: 1360912 record(s), 0 row(s) skipped, 1 warning(s) loading psl.tab
    featureBits xenTro1 blatFr1
    # 27096524 bases of 1381238994 (1.962%) in intersection
    featureBits hg17 blatFr1
    #21977081 bases of 2866216770 (0.767%) in intersection

# MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR xenTro1 (DONE 10/28/04 Fan)
    ssh hgwdev
    echo 'insert into blatServers values("xenTro1", "blat13", "17780", 1, 0); \
    insert into blatServers values("xenTro1", "blat13", "17781", 0, 1);' \
    | hgsql -h genome-testdb hgcentraltest

# ALIGN TO CHICKEN (BLASTZ/CHAIN/NET DONE 10/28/04 JK)
  ssh kk
  mkdir -p /cluster/data/xenTro1/bed/zb.galGal2
  cd /cluster/data/xenTro1/bed/zb.galGal2

  # Create DEF file for run of scaffolds vs. chicken chromosomes
  cat << '_EOF_' > DEF

# chicken vs. xenopus
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Set up blastz parameters using parameters between chicken and fish,
# but not abridging repeats since can't do that with scaffolds, and
# it's not very relevant at this evolutionary distance.
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=8000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
BLASTZ_ABRIDGE_REPEATS=0

# TARGET: Chicken
SEQ1_DIR=/iscratch/i/galGal2/nib
SEQ1_RMSK=
SEQ1_FLAG=
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Xenopus
SEQ2_DIR=/scratch/hg/xenTro1/maskedPieces
SEQ2_RMSK=
SEQ2_FLAG=
SEQ2_SMSK=
SEQ2_IN_CONTIGS=1
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/cluster/data/xenTro1/bed/zb.galGal2

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len

'_EOF_'

  # << this line keeps emacs coloring happy
  bash
  source DEF
  mkdir $RAW run.0
  cp /cluster/data/galGal2/chrom.sizes $BASE/S1.len
  cp /cluster/data/xenTro1/chrom.sizes $BASE/S2.len
  /cluster/home/angie/hummus/make-joblist $DEF > $BASE/run.0/j
  # (Next time use /cluster/bin/scripts/blastz-make-joblist instead)
  sh ./xdir.sh
  cd run.0
  sed -e 's@^blastz-run@/cluster/bin/penn/blastz-run@' j > jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 52397 of 52397 jobs
# CPU time in finished jobs:   19113995s  318566.59m  5309.44h  221.23d  0.606 y
# IO & Wait Time:                212544s    3542.40m    59.04h    2.46d  0.007 y
# Average job time:                 369s       6.15m     0.10h    0.00d
# Longest job:                     3764s      62.73m     1.05h    0.04d
# Submission to last job:         39454s     657.57m    10.96h    0.46d

  # second cluster run: lift raw alignments -> lav dir
  ssh kki
  cd /cluster/data/xenTro1/bed/zb.galGal2
  bash # if a csh/tcsh user
  source DEF
  mkdir run.1 lav
  /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE > $BASE/run.1/jobList
  cd run.1
  para create jobList
  para try, push check, para push, para check....
# Completed: 151 of 151 jobs
# CPU time in finished jobs:       1583s      26.38m     0.44h    0.02d  0.000 y
# IO & Wait Time:                  2086s      34.77m     0.58h    0.02d  0.000 y
# Average job time:                  24s       0.40m     0.01h    0.00d
# Longest job:                      115s       1.92m     0.03h    0.00d
# Submission to last job:           703s      11.72m     0.20h    0.01d

    # Convert lav to axt.  This takes 5 or 10 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.galGal2
    mkdir axtTemp
    cd lav
    foreach i (*)
	catDir $i | \
            | lavToAxt stdin  /cluster/data/galGal2/nib \
	    /cluster/data/xenTro1/xenTro1.2bit ../axtTemp/$i.axt
        echo done $i
    end


# CHAIN CHICKEN BLASTZ 
    # Run axtChain on little cluster
    # Note, for this one might as well do it serially on file server too.
    # The longest job (chr19) takes up 90% of the time.
    ssh kki
    cd /cluster/data/xenTro1/bed/zb.galGal2
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chainRaw
    ls -1S /cluster/data/xenTro1/bed/zb.galGal2/axtTemp/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chainRaw/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    # Reuse gap penalties from chicken run.
    cat << '_EOF_' > temp.gap
tablesize	11
smallSize	111
position	1	2	3	11	111	2111	12111	32111	72111	152111	252111
qGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
tGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
bothGap	625	660	700	750	900	1400	4000	8000	16000	32000	57000
'_EOF_'
    # << this line makes emacs coloring happy
    sed 's/  */\t/g' temp.gap > ../../xenopusHumanTuned.gap
    rm -f temp.gap

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \
                      -linearGap=../../xenopusHumanTuned.gap \
                      -minScore=5000 $1 \
       /iscratch/i/galGal2/nib \
       /iscratch/i/xenTro1/xenTro1.2bit $2 $3
'_EOF_'

    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try
    para check
    para push
# Completed: 52 of 54 jobs
# Crashed: 2 jobs
# CPU time in finished jobs:        495s       8.25m     0.14h    0.01d  0.000 y
# IO & Wait Time:                   281s       4.68m     0.08h    0.00d  0.000 y
# Average job time:                  15s       0.25m     0.00h    0.00d
# Longest job:                      215s       3.58m     0.06h    0.00d
# Submission to last job:           215s       3.58m     0.06h    0.00d
#
# Note, the two crashed were from chr6_random and chr8_random, that simply
# had no alignments and thus produced empty files, so no problem.

    # On the cluster server weed the chains for repeats
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain
    mkdir chain
    cd chainRaw
    foreach i (*.chain)
        chainAntiRepeat /cluster/data/galGal2/nib /cluster/data/xenTro1/xenTro1.2bit $i ../chain/$i
    end

    # now on the cluster server, sort chains.  The chainMergeSort and chainSplit
    # steps both take about 10 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain


    chainMergeSort run1/chain/*.chain > all.chain
    rm run1/chain/*.chain
    rm -r ../axtTemp
    chainSplit chain all.chain

    # Load chains into database.  This takes an hour
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain galGal2 ${c}_chainXenTro1 $i
    end
    featureBits galGal2 chainXenTro1Link

# NET XENOPUS BLASTZ
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Create axt files for the net as so:
    netSplit noClass.net noClass
    cd noClass
    mkdir ../../axtNet
    foreach i (*.net)
	netToAxt $i ../chain/$i:r.chain /cluster/data/galGal2/nib /cluster/data/xenTro1/xenTro1.2bit ../../axtNet/$i:r.axt
    end
    cd ..
    rm -r noClass
    
    # Add classification info using db tables: (Stopped here, need gap tables)
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain
    netClass -noAr noClass.net galGal2 xenTro1 galGal2.net
    rm noClass.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.galGal2/axtChain
    netFilter -minGap=10 galGal2.net |  hgLoadNet galGal2 netXenTro1 stdin

    # index had NULL cardinality; analzye table to fix (DONE 1/18/2005 Heather)
    hgsql hg17
    analyze table netXenTro1

# SWAP EVERYTHING AND REMAKE NET FROM XENOPUS REFERENCE
    ssh kksilo
    cd /cluster/data/xenTro1/bed
    mkdir -p bz.galGal2/axtChain
    chainSwap zb.galGal2/axtChain/all.chain bz.galGal2/axtChain/all.chain
    cd bz.galGal2/axtChain
    chainPreNet all.chain ../../zb.galGal2/S2.len ../../zb.galGal2/S1.len stdout \
    | chainNet stdin -minSpace=1 ../../zb.galGal2/S2.len ../../zb.galGal2/S1.len stdout /dev/null \
    | netSyntenic stdin noClass.net
    mkdir ../axtNet
    netToAxt noClass.net all.chain /cluster/data/xenTro1/xenTro1.2bit /cluster/data/galGal2/nib \
	../axtNet/all.axt

    # Classify net and load into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/bz.galGal2/axtChain
    netClass -noAr noClass.net xenTro1 galGal2 xenTro1.net
    rm noClass.net
    netFilter -minGap=10 xenTro1.net | hgLoadNet xenTro1 netGalGal2 stdin

    # Load chains into database (takes about 2 hours).
    hgLoadChain -tIndex xenTro1 chainGalGal2 all.chain
    rm chain.tab link.tab

# ALIGN TO HUMAN (BLASTZ/CHAIN/NET DONE 10/28/04 JK)
  ssh kk
  mkdir -p /cluster/data/xenTro1/bed/zb.hg17
  cd /cluster/data/xenTro1/bed/zb.hg17

  # Create DEF file for run of scaffolds vs. human chromosomes
  cat << '_EOF_' > DEF

# human vs. xenopus
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Set up blastz parameters using parameters between chicken and fish,
# but not abridging repeats since can't do that with scaffolds, and
# it's not very relevant at this evolutionary distance.
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=8000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
BLASTZ_ABRIDGE_REPEATS=0

# TARGET: Human
SEQ1_DIR=/scratch/hg/gs.18/build35/bothMaskedNibs
SEQ1_RMSK=
SEQ1_FLAG=
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Xenopus
SEQ2_DIR=/scratch/hg/xenTro1/maskedPieces
SEQ2_RMSK=
SEQ2_FLAG=
SEQ2_SMSK=
SEQ2_IN_CONTIGS=1
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/cluster/data/xenTro1/bed/zb.hg17

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len

'_EOF_'

  # << this line keeps emacs coloring happy
  bash
  source DEF
  mkdir $RAW run.0
  cp /cluster/data/hg17/chrom.sizes $BASE/S1.len
  cp /cluster/data/xenTro1/chrom.sizes $BASE/S2.len
  /cluster/home/angie/hummus/make-joblist $DEF > $BASE/run.0/j
  sh ./xdir.sh
  cd run.0
  sed -e 's@^blastz-run@/cluster/bin/penn/blastz-run@' j > jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 118327 of 118327 jobs
# CPU time in finished jobs:   28921705s  482028.42m  8033.81h  334.74d  0.917 y
# IO & Wait Time:                400616s    6676.93m   111.28h    4.64d  0.013 y
# Average job time:                 248s       4.13m     0.07h    0.00d
# Longest job:                     5415s      90.25m     1.50h    0.06d
# Submission to last job:         35773s     596.22m     9.94h    0.41d

  # second cluster run: lift raw alignments -> lav dir
  ssh kki
  cd /cluster/data/xenTro1/bed/zb.hg17
  bash # if a csh/tcsh user
  source DEF
  mkdir run.1 lav
  /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE > $BASE/run.1/jobList
  cd run.1
  wc -l jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 341 of 341 jobs
# CPU time in finished jobs:       4477s      74.62m     1.24h    0.05d  0.000 y
# IO & Wait Time:                 12372s     206.19m     3.44h    0.14d  0.000 y
# Average job time:                  49s       0.82m     0.01h    0.00d
# Longest job:                      305s       5.08m     0.08h    0.00d
# Submission to last job:          1113s      18.55m     0.31h    0.01d

    # third run: lav -> axt
    # (Note - this seems to be i/o bound even on a single CPU.  Next time
    # I'd suggest just running it serially on the file server -jk )
    ssh kki
    cd /cluster/data/xenTro1/bed/zb.hg17
    mkdir axtChrom run.2
    cd run.2
    cat << 'EOF' > do.csh
#!/bin/csh -ef
cd $1
set chr = $1:t
cat `ls -1 *.lav | sort -g` \
| lavToAxt stdin \
        /iscratch/i/hg17/bothMaskedNibs /iscratch/i/xenTro1/xenTro1.2bit ../../axtChrom/$chr.axt
'EOF'
    # << this line keeps emacs coloring happy
    chmod a+x do.csh
    cp /dev/null jobList
    foreach d (../lav/chr*)
      echo "do.csh $d" >> jobList
    end
    para create jobList
        # 46 jobs
    para try
    para check
    para push


# CHAIN HUMAN BLASTZ 
    # Run axtChain on little cluster
    # Note, for this one might as well do it serially on file server too.
    # The longest job (chr19) takes up 90% of the time.
    ssh kki
    cd /cluster/data/xenTro1/bed/zb.hg17
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chainRaw
    ls -1S /cluster/data/xenTro1/bed/zb.hg17/axtChrom/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chainRaw/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    # Reuse gap penalties from chicken run.
    cat << '_EOF_' > temp.gap
tablesize	11
smallSize	111
position	1	2	3	11	111	2111	12111	32111	72111	152111	252111
qGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
tGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
bothGap	625	660	700	750	900	1400	4000	8000	16000	32000	57000
'_EOF_'
    # << this line makes emacs coloring happy
    sed 's/  */\t/g' temp.gap > ../../xenopusHumanTuned.gap
    rm -f temp.gap

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \
                      -linearGap=../../xenopusHumanTuned.gap \
                      -minScore=5000 $1 \
    /iscratch/i/hg17/bothMaskedNibs \
    /iscratch/i/xenTro1/xenTro1.2bit $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try
    para check
    para push
Completed: 46 of 46 jobs
CPU time in finished jobs:       2396s      39.94m     0.67h    0.03d  0.000 y
IO & Wait Time:                  7221s     120.34m     2.01h    0.08d  0.000 y
Average job time:                 209s       3.48m     0.06h    0.00d
Longest job:                     2042s      34.03m     0.57h    0.02d
Submission to last job:          2042s      34.03m     0.57h    0.02d

    # On the cluster server weed the chains for repeats
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain
    mkdir chain
    cd chainRaw
    foreach i (*.chain)
        chainAntiRepeat /cluster/data/galGal2/nib /cluster/data/xenTro1/xenTro1.2bit $i ../chain/$i
    end

    # now on the cluster server, sort chains.  The chainMergeSort and chainSplit
    # steps both take about 10 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # Load chains into database.  This takes an hour
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain hg17 ${c}_chainXenTro1 $i
    end
    featureBits hg17 chainXenTro1Link

# NET XENOPUS BLASTZ
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Create axt files for the net as so:
    netSplit noClass.net noClass
    cd noClass
    mkdir ../../axtNet
    foreach i (*.net)
	netToAxt $i ../chain/$i:r.chain /cluster/data/hg17/nib /cluster/data/xenTro1/xenTro1.2bit ../../axtNet/$i:r.axt
    end
    cd ..
    rm -r noClass
    
    # Add classification info using db tables: 
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain
    netClass -noAr noClass.net hg17 xenTro1 human.net
    rm noClass.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/hg17/bed/zb.hg17/axtChain
    netFilter -minGap=10 human.net |  hgLoadNet hg17 netXenTro1 stdin

# MAKE NET MAFS FOR MULTIZ (2004-12-15 kate)
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.hg17/axtNet
    mkdir -p ../mafNet
    cat *.axt | axtSort stdin stdout | axtToMaf -tSplit stdin \
     -tPrefix=hg17. /cluster/data/hg17/chrom.sizes \
    qPrefix=xenTro1. /cluster/data/xenTro1/chrom.sizes ../mafNet

# redo axt's and maf's to remove overlaps (2006-04-07 kate)
    ssh kkstore04
    cd /cluster/data/xenTro1/bed/zb.hg17
    mv axtNet axtNet.old
    mv mafNet mafNet.old
    mkdir -p axtNet mafNet
    cd axtChain
    netSplit human.net net/
    chainSplit chain/ all.chain
cat > fix.csh << 'EOF'
    date
    foreach f (net/chr*.net)
        set c = $f:t:r
        echo $c
        netToAxt net/$c.net chain/$c.chain \
            /cluster/data/hg17/nib /cluster/data/xenTro1/xenTro1.2bit stdout | \
                axtSort stdin ../axtNet/$c.axt
        echo "Complete: $c.net -> $c.axt"
        axtToMaf ../axtNet/$c.axt \
            /cluster/data/hg17/chrom.sizes /cluster/data/xenTro1/chrom.sizes \
            ../mafNet/$c.maf -tPrefix=hg17. -qPrefix=xenTro1.
    end
    date
'EOF'
    csh fix.csh >&! fix.log &
    cd /san/sanvol1/scratch/hg17/mafNet
    rm -fr xenTro1
    cp -rp /cluster/data/xenTro1/bed/zb.hg17/mafNet xenTro1


# SWAP EVERYTHING AND REMAKE NET FROM XENOPUS REFERENCE
    ssh kksilo
    cd /cluster/data/xenTro1/bed
    mkdir -p bz.hg17/axtChain
    chainSwap zb.hg17/axtChain/all.chain bz.hg17/axtChain/all.chain
    cd bz.hg17/axtChain
    chainPreNet all.chain ../../zb.hg17/S2.len ../../zb.hg17/S1.len stdout \
    | chainNet stdin -minSpace=1 ../../zb.hg17/S2.len ../../zb.hg17/S1.len stdout /dev/null \
    | netSyntenic stdin noClass.net
    mkdir ../axtNet
    netToAxt noClass.net all.chain /cluster/data/xenTro1/xenTro1.2bit /cluster/data/hg17/nib \
	../axtNet/all.axt

    # Classify net and load into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/bz.hg17/axtChain
    netClass -noAr noClass.net xenTro1 hg17 xenTro1.net
    rm noClass.net
    netFilter -minGap=10 xenTro1.net | hgLoadNet xenTro1 netHg17 stdin

    # Load chains into database (takes about 2 hours).
    hgLoadChain -tIndex xenTro1 chainHg17 all.chain
    rm chain.tab link.tab

    # compressed and create symlinks to make consistent with newer builds
    # 2006-04-09 markd
    ln -s zb.hg17  /cluster/data/xenTro1/bed/blastz.hg17
    cd /cluster/data/xenTro1/bed/zb.hg17/axtChain
    gzip all.chain human.net 
    ln -s all.chain.gz xenTro1.hg17.all.chain.gz
    ln -s human.net.gz xenTro1.hg17.net.gz

# ALIGN TO MOUSE (BLASTZ/CHAIN/NET DONE 10/24/04 JK))
  ssh kk
  mkdir -p /cluster/data/xenTro1/bed/zb.mm5
  cd /cluster/data/xenTro1/bed/zb.mm5

  # Create DEF file for run of scaffolds vs. human chromosomes
  cat << '_EOF_' > DEF

# mouse vs. xenopus
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Set up blastz parameters using parameters between chicken and fish,
# but not abridging repeats since can't do that with scaffolds, and
# it's not very relevant at this evolutionary distance.
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=8000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
BLASTZ_ABRIDGE_REPEATS=0

# TARGET: mouse
SEQ1_DIR=/scratch/mus/mm5/softNib
SEQ1_RMSK=
SEQ1_FLAG=
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Xenopus
SEQ2_DIR=/scratch/hg/xenTro1/maskedPieces
SEQ2_RMSK=
SEQ2_FLAG=
SEQ2_SMSK=
SEQ2_IN_CONTIGS=1
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/cluster/data/xenTro1/bed/zb.mm5

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len

'_EOF_'

  # << this line keeps emacs coloring happy
  bash
  source DEF
  mkdir $RAW run.0
  cp /cluster/data/mm5/chrom.sizes $BASE/S1.len
  cp /cluster/data/xenTro1/chrom.sizes $BASE/S2.len
  /cluster/home/angie/hummus/make-joblist $DEF > $BASE/run.0/j
  sh ./xdir.sh
  cd run.0
  sed -e 's@^blastz-run@/cluster/bin/penn/blastz-run@' j > jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 118327 of 118327 jobs
# CPU time in finished jobs:   26625995s  443766.59m  7396.11h  308.17d  0.844 y
# IO & Wait Time:                407224s    6787.06m   113.12h    4.71d  0.013 y
# Average job time:                 228s       3.81m     0.06h    0.00d
# Longest job:                     2642s      44.03m     0.73h    0.03d
# Submission to last job:         47229s     787.15m    13.12h    0.55d

  # second cluster run: lift raw alignments -> lav dir
  ssh kki
  cd /cluster/data/xenTro1/bed/zb.mm5
  bash # if a csh/tcsh user
  source DEF
  mkdir run.1 lav
  /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE > $BASE/run.1/jobList
  cd run.1
  wc -l jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 341 of 341 jobs
# CPU time in finished jobs:       3787s      63.12m     1.05h    0.04d  0.000 y
# IO & Wait Time:                  6585s     109.75m     1.83h    0.08d  0.000 y
# Average job time:                  30s       0.51m     0.01h    0.00d
# Longest job:                      163s       2.72m     0.05h    0.00d
# Submission to last job:           752s      12.53m     0.21h    0.01d

    # Convert lav to axt.  This takes 30 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.mm5
    mkdir axtTemp
    cd lav
    foreach i (*)
	catDir $i | \
            | lavToAxt stdin  /cluster/data/mm5/nib \
	    /cluster/data/xenTro1/xenTro1.2bit ../axtTemp/$i.axt
        echo done $i
    end


# CHAIN HUMAN BLASTZ 
    # Run axtChain on little cluster
    # The longest job (chr19) takes up 90% of the time.
    ssh kki
    cd /cluster/data/xenTro1/bed/zb.mm5
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chainRaw
    ls -1S /cluster/data/xenTro1/bed/zb.mm5/axtTemp/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chainRaw/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    # Reuse gap penalties from chicken run.
    cat << '_EOF_' > temp.gap
tablesize	11
smallSize	111
position	1	2	3	11	111	2111	12111	32111	72111	152111	252111
qGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
tGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
bothGap	625	660	700	750	900	1400	4000	8000	16000	32000	57000
'_EOF_'
    # << this line makes emacs coloring happy
    sed 's/  */\t/g' temp.gap > ../../xenopusHumanTuned.gap
    rm -f temp.gap

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \
                      -linearGap=../../xenopusHumanTuned.gap \
                      -minScore=5000 $1 \
    /scratch/mus/mm5/softNib \
    /iscratch/i/xenTro1/xenTro1.2bit $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try
    para check
    para push
Completed: 46 of 46 jobs
CPU time in finished jobs:       2396s      39.94m     0.67h    0.03d  0.000 y
IO & Wait Time:                  7221s     120.34m     2.01h    0.08d  0.000 y
Average job time:                 209s       3.48m     0.06h    0.00d
Longest job:                     2042s      34.03m     0.57h    0.02d
Submission to last job:          2042s      34.03m     0.57h    0.02d

    # On the cluster server weed the chains for repeats
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.mm5/axtChain
    mkdir chain
    cd chainRaw
    foreach i (*.chain)
        chainAntiRepeat /cluster/data/galGal2/nib /cluster/data/xenTro1/xenTro1.2bit $i ../chain/$i
    end

    # now on the cluster server, sort chains.  The chainMergeSort and chainSplit
    # steps both take about 10 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.mm5/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain

    # Load chains into database.  This takes an hour
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.mm5/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain mm5 ${c}_chainXenTro1 $i
    end
    featureBits mm5 chainXenTro1Link

# NET XENOPUS BLASTZ
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.mm5/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Create axt files for the net as so:
    netSplit noClass.net noClass
    cd noClass
    mkdir ../../axtNet
    foreach i (*.net)
	netToAxt $i ../chain/$i:r.chain /cluster/data/mm5/nib /cluster/data/xenTro1/xenTro1.2bit ../../axtNet/$i:r.axt
    end
    cd ..
    rm -r noClass
    
    # Add classification info using db tables: (Stopped here, need gap tables)
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.mm5/axtChain
    netClass -noAr noClass.net mm5 xenTro1 mm5.net
    rm noClass.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/mm5/bed/zb.mm5/axtChain
    netFilter -minGap=10 mm5.net |  hgLoadNet mm5 netXenTro1 stdin

# SWAP EVERYTHING AND REMAKE NET FROM XENOPUS REFERENCE
    ssh kksilo
    cd /cluster/data/xenTro1/bed
    mkdir -p bz.mm5/axtChain
    chainSwap zb.mm5/axtChain/all.chain bz.mm5/axtChain/all.chain
    cd bz.mm5/axtChain
    chainPreNet all.chain ../../zb.mm5/S2.len ../../zb.mm5/S1.len stdout \
    | chainNet stdin -minSpace=1 ../../zb.mm5/S2.len ../../zb.mm5/S1.len stdout /dev/null \
    | netSyntenic stdin noClass.net
    mkdir ../axtNet
    netToAxt noClass.net all.chain /cluster/data/xenTro1/xenTro1.2bit /cluster/data/mm5/nib \
	../axtNet/all.axt

    # Classify net and load into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/bz.mm5/axtChain
    netClass -noAr noClass.net xenTro1 mm5 xenTro1.net
    rm noClass.net
    netFilter -minGap=10 xenTro1.net | hgLoadNet xenTro1 netMm5 stdin

    # Load chains into database (takes about 2 hours).
    hgLoadChain -tIndex xenTro1 chainMm5 all.chain
    rm chain.tab link.tab

# ALIGN TO ZEBRAFISH (BLASTZ/CHAIN/NET DONE 10/24/04 JK))
  ssh kk
  mkdir -p /cluster/data/xenTro1/bed/zb.danRer1
  cd /cluster/data/xenTro1/bed/zb.danRer1

  # Create DEF file for run of scaffolds vs. zebrafish chromosomes
  cat << '_EOF_' > DEF

# zebrafish vs. xenopus
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin

ALIGN=blastz-run
BLASTZ=blastz

# Set up blastz parameters using parameters between chicken and fish,
# but not abridging repeats since can't do that with scaffolds, and
# it's not very relevant at this evolutionary distance.
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q
BLASTZ_ABRIDGE_REPEATS=0

# TARGET: Chicken
SEQ1_DIR=/iscratch/i/danRer1/nib
SEQ1_RMSK=
SEQ1_FLAG=
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Xenopus
SEQ2_DIR=/scratch/hg/xenTro1/maskedPieces
SEQ2_RMSK=
SEQ2_FLAG=
SEQ2_SMSK=
SEQ2_IN_CONTIGS=1
SEQ2_CHUNK=10000000
SEQ2_LAP=0

BASE=/cluster/data/xenTro1/bed/zb.danRer1

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len

'_EOF_'

  # << this line keeps emacs coloring happy
  bash
  source DEF
  mkdir $RAW run.0
  cp /cluster/data/danRer1/chrom.sizes $BASE/S1.len
  cp /cluster/data/xenTro1/chrom.sizes $BASE/S2.len
  /cluster/home/angie/hummus/make-joblist $DEF > $BASE/run.0/j
  sh ./xdir.sh
  cd run.0
  sed -e 's@^blastz-run@/cluster/bin/penn/blastz-run@' j > jobList
  para create jobList
  para try, push check, para push, para check....
# Completed: 58990 of 58990 jobs
# CPU time in finished jobs:   10946601s  182443.35m  3040.72h  126.70d  0.347 y
# IO & Wait Time:                389169s    6486.15m   108.10h    4.50d  0.012 y
# Average job time:                 192s       3.20m     0.05h    0.00d
# Longest job:                     5028s      83.80m     1.40h    0.06d
# Submission to last job:        138773s    2312.88m    38.55h    1.61d

  # second cluster run: lift raw alignments -> lav dir
  ssh kki
  cd /cluster/data/xenTro1/bed/zb.danRer1
  bash # if a csh/tcsh user
  source DEF
  mkdir run.1 lav
  /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE > $BASE/run.1/jobList
  cd run.1
  para create jobList
# Completed: 170 of 170 jobs
# CPU time in finished jobs:       5013s      83.55m     1.39h    0.06d  0.000 y
# IO & Wait Time:                 14173s     236.22m     3.94h    0.16d  0.000 y
# Average job time:                 113s       1.88m     0.03h    0.00d
# Longest job:                      333s       5.55m     0.09h    0.00d
# Submission to last job:          2837s      47.28m     0.79h    0.03d

    # Convert lav to axt.  This takes about 2 hours
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.danRer1
    mkdir axtTemp
    cd lav
    foreach i (*)
	catDir $i | \
            lavToAxt stdin  /cluster/data/danRer1/nib \
	    /cluster/data/xenTro1/xenTro1.2bit ../axtTemp/$i.axt
        echo done $i
    end


# CHAIN ZEBRAFISH BLASTZ 
    # Run axtChain on little cluster
    # Note, for this one might as well do it serially on file server too.
    # The longest job (chr19) takes up 90% of the time.
    ssh kki
    cd /cluster/data/xenTro1/bed/zb.danRer1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chainRaw
    ls -1S /cluster/data/xenTro1/bed/zb.danRer1/axtTemp/*.axt \
      > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chainRaw/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    # Reuse gap penalties from chicken run.
    cat << '_EOF_' > temp.gap
tablesize	11
smallSize	111
position	1	2	3	11	111	2111	12111	32111	72111	152111	252111
qGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
tGap	325	360	400	450	600	1100	3600	7600	15600	31600	56600
bothGap	625	660	700	750	900	1400	4000	8000	16000	32000	57000
'_EOF_'
    # << this line makes emacs coloring happy
    sed 's/  */\t/g' temp.gap > ../../xenopusHumanTuned.gap
    rm -f temp.gap

    cat << '_EOF_' > doChain
#!/bin/csh
axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \
                      -linearGap=../../xenopusHumanTuned.gap \
                      -minScore=5000 $1 \
    /iscratch/i/danRer1/nib \
    /iscratch/i/xenTro1/xenTro1.2bit $2 > $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try
    para check
    para push
# Completed: 52 of 54 jobs
# Crashed: 2 jobs
# CPU time in finished jobs:        495s       8.25m     0.14h    0.01d  0.000 y
# IO & Wait Time:                   281s       4.68m     0.08h    0.00d  0.000 y
# Average job time:                  15s       0.25m     0.00h    0.00d
# Longest job:                      215s       3.58m     0.06h    0.00d
# Submission to last job:           215s       3.58m     0.06h    0.00d
#
# Note, the two crashed were from chr6_random and chr8_random, that simply
# had no alignments and thus produced empty files, so no problem.

    # On the cluster server weed the chains for repeats
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain
    mkdir chain
    cd chainRaw
    foreach i (*.chain)
        chainAntiRepeat /cluster/data/galGal2/nib /cluster/data/xenTro1/xenTro1.2bit $i ../chain/$i
    end

    # now on the cluster server, sort chains.  The chainMergeSort and chainSplit
    # steps both take about 10 minutes
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    rm run1/chain/*.chain
    rm -r ../axtTemp

    # Load chains into database.  This takes an hour or three 
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        echo loading $c
        hgLoadChain danRer1 ${c}_chainXenTro1 $i
    end
    featureBits danRer1 chainXenTro1Link

# NET XENOPUS/ZEBRAFISH BLASTZ
    ssh kksilo
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain
    chainPreNet all.chain ../S1.len ../S2.len stdout \
    | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \
    | netSyntenic stdin noClass.net

    # Create axt files for the net as so:
    netSplit noClass.net noClass
    cd noClass
    mkdir ../../axtNet
    foreach i (*.net)
	netToAxt $i ../chain/$i:r.chain /cluster/data/danRer1/nib /cluster/data/xenTro1/xenTro1.2bit ../../axtNet/$i:r.axt
    end
    cd ..
    rm -r noClass
    
    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain
    netClass -noAr noClass.net danRer1 xenTro1 danRer1.net
    rm noClass.net

    # Load the nets into database  
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/zb.danRer1/axtChain
    netFilter -minGap=10 danRer1.net |  hgLoadNet danRer1 netXenTro1 stdin

# SWAP EVERYTHING AND REMAKE NET FROM XENOPUS REFERENCE
    ssh kksilo
    cd /cluster/data/xenTro1/bed
    mkdir -p bz.danRer1/axtChain
    chainSwap zb.danRer1/axtChain/all.chain bz.danRer1/axtChain/all.chain
    cd bz.danRer1/axtChain
    chainPreNet all.chain ../../zb.danRer1/S2.len ../../zb.danRer1/S1.len stdout \
    | chainNet stdin -minSpace=1 ../../zb.danRer1/S2.len ../../zb.danRer1/S1.len stdout /dev/null \
    | netSyntenic stdin noClass.net
    mkdir ../axtNet
    netToAxt noClass.net all.chain /cluster/data/xenTro1/xenTro1.2bit /cluster/data/danRer1/nib \
	../axtNet/all.axt

    # Classify net and load into database
    ssh hgwdev 
    cd /cluster/data/xenTro1/bed/bz.danRer1/axtChain
    netClass -noAr noClass.net xenTro1 danRer1 xenTro1.net
    rm noClass.net
    netFilter -minGap=10 xenTro1.net | hgLoadNet xenTro1 netDanRer1 stdin

    # Load chains into database (takes about 8 hours).  These are so large
    # we have to split them first.  
    ssh kksilo
    cd /cluster/store8/xenTro1/bed/bz.danRer1/axtChain
    chainSplit -lump=10 lumps all.chain

    ssh hgwdev
    cd /cluster/store8/xenTro1/bed/bz.danRer1/axtChain/lumps
    hgLoadChain -tIndex xenTro1 chainDanRer1 000.chain
    rm 000.chain
    foreach i (*.chain)
	echo loading $i
        hgLoadChain -tIndex -oldTable xenTro1 chainDanRer1 $i
    end
    rm chain.tab link.tab


# CREATE MULTIPLE ALIGNMENTS WITH MULTIZ (DONE 10/24/04 JK)
  ssh kksilo
  cd /cluster/data/xenTro1/bed
  mkdir -p multiz
  axtSort bz.galGal2/axtNet/all.axt stdout | axtToMaf -tSplit stdin \
     -tPrefix=xenTro1. /cluster/data/xenTro1/chrom.sizes \
     -qPrefix=galGal2. /cluster/data/galGal2/chrom.sizes \
     multiz/galGal2
  axtSort bz.hg17/axtNet/all.axt stdout | axtToMaf -tSplit stdin \
     -tPrefix=xenTro1. /cluster/data/xenTro1/chrom.sizes \
     -qPrefix=hg17. /cluster/data/hg17/chrom.sizes \
     multiz/hg17
  axtSort bz.mm5/axtNet/all.axt stdout | axtToMaf -tSplit stdin \
     -tPrefix=xenTro1. /cluster/data/xenTro1/chrom.sizes \
     -qPrefix=mm5. /cluster/data/mm5/chrom.sizes \
     multiz/mm5
  axtSort bz.danRer1/axtNet/all.axt stdout | axtToMaf -tSplit stdin \
     -tPrefix=xenTro1. /cluster/data/xenTro1/chrom.sizes \
     -qPrefix=danRer1. /cluster/data/danRer1/chrom.sizes \
     multiz/danRer1

# Do multiz in a tizzy (takes about an hour)
  ssh kksilo
  cd /cluster/data/xenTro1/bed/multiz
  echo galGal2 hg17 mm5 danRer1 > dirList
  awk '{print $1;}' /cluster/data/xenTro1/chrom.sizes > scaffoldList
  multizzy dirList scaffoldList mtz
  catDir mtz | mafFilter stdin -minScore=500 > multiz5way.maf

# Load into database
  ssh hgwdev
  cd /cluster/data/xenTro1/bed/multiz
  mkdir -p /gbdb/xenTro1/multiz5way
  ln -s /cluster/data/xenTro1/bed/multiz/multiz5way.maf /gbdb/xenTro1/multiz5way
  hgLoadMaf xenTro1 multiz5way

# Create pairwise maf files (no longer split across scaffolds)
   ssh kksilo
   cd /cluster/data/xenTro1/bed/multiz
   mkdir -p pairwise
   catDir galGal2 > pairwise/galGal2.maf
   catDir hg17 > pairwise/hg17.maf
   catDir mm5 > pairwise/mm5.maf
   catDir danRer1 > pairwise/danRer1.maf

# Load pairwise mafs into database
   ssh  hgwdev
   set h = /cluster/data/xenTro1/bed/multiz/pairwise
   cd $h
   foreach i (*.maf)
      set o = $i:r
      set t = ${o}_netBlastz
      mkdir -p /gbdb/xenTro1/multiz5way/$t
      ln -s $h/$i /gbdb/xenTro1/multiz5way/$t
      hgLoadMaf xenTro1 $t -pathPrefix=/gbdb/xenTro1/multiz5way/$t
   end

   mkdir -p /gbdb/xenTro1/multiz5way/galGal2_netBlastz
   ln -s galGal2.maf /gbdb/xenTro1/multiz5way/galGal2_netBlastz
   hgLoadMaf xenTro1 galGal2_netBlastz -pathPrefix=/gbdb/xenTro1/multiz5way/galGal2_netBlastz
   mkdir -p /gbdb/xenTro1/multiz5way/hg17_netBlastz
   ln -s hg17.maf /gbdb/xenTro1/multiz5way/hg17_netBlastz
   hgLoadMaf xenTro1 hg17_netBlastz -pathPrefix=/gbdb/xenTro1/multiz5way/hg17_netBlastz
   mkdir -p /gbdb/xenTro1/multiz5way/mm5_netBlastz
   ln -s mm5.maf /gbdb/xenTro1/multiz5way/mm5_netBlastz
   hgLoadMaf xenTro1 mm5_netBlastz -pathPrefix=/gbdb/xenTro1/multiz5way/mm5_netBlastz
   mkdir -p /gbdb/xenTro1/multiz5way/danRer1_netBlastz
   ln -s danRer1.maf /gbdb/xenTro1/multiz5way/danRer1_netBlastz
   hgLoadMaf xenTro1 danRer1_netBlastz -pathPrefix=/gbdb/xenTro1/multiz5way/danRer1_netBlastz


# CREATE CONSERVATION WIGGLE WITH PHASTCONS (DONE 10/25/04 JK)

# Estimate phastCons parameters
  ssh kksilo
  cd /cluster/data/xenTro1/bed/multiz
  mkdir cons
  cd cons

  # Create a starting-tree.mod based on scaffold_1.
  twoBitToFa ../../xenTro1.2bit -seq=scaffold_1 scaffold_1.fa
  /cluster/bin/phast/msa_split ../mtz/scaffold_1.maf --refseq scaffold_1.fa --in-format MAF --windows 100000000,1000 --out-format SS --between-blocks 5000 --out-root s1
  /cluster/bin/phast/phyloFit -i SS s1.*.ss --tree "((((hg17,mm5),galGal2),xenTro1),danRer1)" --out-root starting-tree.mod
  rm scaffold_1.fa scaffold1.*.ss

  # Create directory full of sequence in individual scaffold files on bluearc.
  ssh kksilo
  mkdir -p /cluster/bluearc/xenTro1/scaffoldFa
  cd /cluster/bluearc/xenTro1/scaffoldFa
  twoBitToFa /cluster/data/xenTro1/xenTro1.2bit stdout | faSplit byname stdin foo

  # Create big bad bloated SS files in bluearc (takes ~45 minutes)
  mkdir -p /cluster/bluearc/xenTro1/cons/ss
  cd /cluster/bluearc/xenTro1/cons/ss
  foreach i (`awk '{print $1}' /cluster/data/xenTro1/chrom.sizes`)
      if (-e /cluster/data/xenTro1/bed/multiz/mtz/$i.maf) then
	  echo msa_split $i
	  /cluster/bin/phast/msa_split /cluster/data/xenTro1/bed/multiz/mtz/$i.maf \
	     --refseq /cluster/bluearc/xenTro1/scaffoldFa/$i.fa \
	     --in-format MAF --windows 1000000,0 --between-blocks 5000 \
	     --out-format SS --out-root $i
      endif
  end

  # Create a random list of 50 1 mb regions 
  ls -l | awk '$5 > 4000000 {print $9;}' | randomLines stdin 50 ../randomSs

  # Set up parasol directory to calculate trees on these 50 regions
  ssh kk9
  cd /cluster/bluearc/xenTro1/cons
  mkdir treeRun1
  cd treeRun1
  mkdir tree log

  # Create little script that calls phastCons with right arguments
  cat > makeTree << '_EOF_'
  /cluster/bin/phast/phastCons ../ss/$1.ss \
      /cluster/data/xenTro1/bed/multiz/cons/starting-tree.mod \
      --gc 0.410 --nrates 1,1 --no-post-probs --ignore-missing \
      --expected-lengths 12 --target-coverage 0.35 \
      --quiet --log log/$1 --estimate-trees tree/$1
'_EOF_'
  chmod a+x makeTree

  # Create gensub file
  cat > gsub << '_EOF_'
#LOOP
makeTree $(root1)
#ENDLOOP
'_EOF_'

  # Make cluster job and run it
  gensub2 ../randomSs single gsub jobList
  para create jobList
  para try/push/check/etc
# Completed: 50 of 50 jobs
# CPU time in finished jobs:       2004s      33.40m     0.56h    0.02d  0.000 y
# IO & Wait Time:                   257s       4.28m     0.07h    0.00d  0.000 y
# Average job time:                  45s       0.75m     0.01h    0.00d
# Longest job:                       91s       1.52m     0.03h    0.00d
# Submission to last job:            92s       1.53m     0.03h    0.00d


  # Now combine parameter estimates.  We can average the .mod files
  # using phyloBoot.  This must be done separately for the conserved
  # and nonconserved models
  ls tree/*.cons.mod > cons.txt
  /cluster/bin/phast/phyloBoot --read-mods '*cons.txt' --output-average ../ave.cons.mod > cons_summary.txt
  ls tree/*.noncons.mod > noncons.txt
  /cluster/bin/phast/phyloBoot --read-mods '*noncons.txt' --output-average ../ave.noncons.mod > noncons_summary.txt
  cd ..
  cp ave.*.mod /cluster/data/xenTro1/bed/multiz/cons

  # Create cluster dir to do main phastCons run
  mkdir consRun1
  cd consRun1
  mkdir ppRaw bed

  # Create script to run phastCons with right parameters
cat > doPhast << '_EOF_'
mkdir -p ppRaw/$2
/cluster/bin/phast/phastCons ../ss/$1.ss ../ave.cons.mod,../ave.noncons.mod \
   --expected-lengths 12 --target-coverage 0.35 --quiet --seqname $2 \
   --idpref $2 --viterbi bed/$1.bed --score --require-informative 0 \
   ppRaw/$2/$1.pp
'_EOF_'
  chmod a+x doPhast

  # Create gsub file
cat > gsub << '_EOF_'
#LOOP
doPhast $(file1) $(root1)
#ENDLOOP
'_EOF_'

  # Create parasol batch and run it
  ls -1 ../ss | sed 's/.ss//' > in.lst> in.lst
  gensub2 in.lst single gsub jobList
  para create jobList
  # para try/check/push/etc.
# Completed: 12869 of 12869 jobs
# CPU time in finished jobs:      11470s     191.17m     3.19h    0.13d  0.000 y
# IO & Wait Time:                 40795s     679.91m    11.33h    0.47d  0.001 y
# Average job time:                   4s       0.07m     0.00h    0.00d
# Longest job:                       20s       0.33m     0.01h    0.00d
# Submission to last job:           675s      11.25m     0.19h    0.01d

    # combine predictions and transform scores to be in 0-1000 interval
    catDir bed | awk 
    	'{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' | \
        /cluster/bin/scripts/lodToBedScore /dev/stdin > \
	/cluster/data/xenTro1/bed/multiz/mostConserved.bed

    # Figure out how much is actually covered by the bed files as so:
    awk '{print $3 - $2;}' /cluster/data/xenTro1/bed/multiz/mostConserved.bed | addCols stdin
    # If the results of the this divided by the non-n genome size (1.5G) aren't around 4%, then
    # do it again, adjusting the target-coverage phastCons parameter.  Actually when I
    # took the target-coverage up to 0.61, it started putting negative numbers into the
    # score column, so left it at 0.35, which results in  0.3% coverage.  

    # Load most conserved track into database
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/multiz
    hgLoadBed xenTro1 mostConserved mostConserved.bed

    # Create merged posterier probability file and wiggle track data files
    ssh kksilo
    cd /cluster/bluearc/xenTro1/cons/consRun1
    wigAsciiCrunch -dirDir ppRaw /cluster/data/xenTro1/bed/multiz/cons/phastCons5.pp
    cd /cluster/data/xenTro1/bed/multiz/cons
    wigAsciiToBinary -wibFile=phastCons5 phastCons5.pp
    
    # Load gbdb and database with wiggle.
    ssh hgwdev
    ln -s /cluster/data/xenTro1/bed/multiz/cons/phastCons5.wib /gbdb/xenTro1/wib/phastCons5.wib
    cd /cluster/data/xenTro1/bed/multiz/cons
    hgLoadWiggle xenTro1 phastCons5 phastCons5.wig

# RE-INDEX 12 TABLES TO ACCOMODATE LONG SCAFFOLD NAMES

    hgsql xenTro1 -e 'drop index qName on blatFr1'
    hgsql xenTro1 -e 'create index qName on blatFr1(qName(16))'

    hgsql xenTro1 -e 'drop index tName on all_est'
    hgsql xenTro1 -e 'create index tName on all_est(tName(16),bin)'
    hgsql xenTro1 -e 'drop index tName_2 on all_est'
    hgsql xenTro1 -e 'create index tName_2 on all_est(tName(16),tStart)'
    hgsql xenTro1 -e 'drop index tName_3 on all_est'
    hgsql xenTro1 -e 'create index tName_3 on all_est(tName(16),tEnd)'

    hgsql xenTro1 -e 'drop index chrom on estOrientInfo'
    hgsql xenTro1 -e 'create index chrom on estOrientInfo(chrom(16),bin)'
    hgsql xenTro1 -e 'drop index chrom_2 on estOrientInfo'
    hgsql xenTro1 -e 'create index chrom_2 on estOrientInfo(chrom(16),chromStart)'
    hgsql xenTro1 -e 'drop index chrom_3 on estOrientInfo'
    hgsql xenTro1 -e 'create index chrom_3 on estOrientInfo(chrom(16),chromEnd)'

    hgsql xenTro1 -e 'drop index chrom on gc5Base'
    hgsql xenTro1 -e 'create index chrom on gc5Base(chrom(16))'

    hgsql xenTro1 -e 'drop index name on genscan'
    hgsql xenTro1 -e 'create index name on genscan(name(16))'
    hgsql xenTro1 -e 'drop index name2 on genscan'
    hgsql xenTro1 -e 'create index name2 on genscan(name2(16))'
    hgsql xenTro1 -e 'drop index chrom on genscan'
    hgsql xenTro1 -e 'create index chrom on genscan(chrom(16),txStart)'
    hgsql xenTro1 -e 'drop index chrom_2 on genscan'
    hgsql xenTro1 -e 'create index chrom_2 on genscan(chrom(16),txEnd)'

    hgsql xenTro1 -e 'drop index chrom on genscanSubopt'
    hgsql xenTro1 -e 'create index chrom on genscanSubopt(chrom(16),bin)'
    hgsql xenTro1 -e 'drop index chrom_2 on genscanSubopt'
    hgsql xenTro1 -e 'create index chrom_2 on genscanSubopt(chrom(16),chromStart)'
    hgsql xenTro1 -e 'drop index chrom_3 on genscanSubopt'
    hgsql xenTro1 -e 'create index chrom_3 on genscanSubopt(chrom(16),chromEnd)'

    hgsql xenTro1 -e 'drop index tName on intronEst'
    hgsql xenTro1 -e 'create index tName on intronEst(tName(16),bin)'
    hgsql xenTro1 -e 'drop index tName_2 on intronEst'
    hgsql xenTro1 -e 'create index tName_2 on intronEst(tName(16),tStart)'
    hgsql xenTro1 -e 'drop index tName_3 on intronEst'
    hgsql xenTro1 -e 'create index tName_3 on intronEst(tName(16),tEnd)'

    hgsql xenTro1 -e 'drop index chrom on mostConserved'
    hgsql xenTro1 -e 'create index chrom on mostConserved(chrom(16),bin)'
    hgsql xenTro1 -e 'drop index chrom_2 on mostConserved'
    hgsql xenTro1 -e 'create index chrom_2 on mostConserved(chrom(16),chromStart)'
    hgsql xenTro1 -e 'drop index chrom_3 on mostConserved'
    hgsql xenTro1 -e 'create index chrom_3 on mostConserved(chrom(16),chromEnd)'

    hgsql xenTro1 -e 'drop index tName on all_mrna'
    hgsql xenTro1 -e 'create index tName on all_mrna(tName(16),bin)'
    hgsql xenTro1 -e 'drop index tName_2 on all_mrna'
    hgsql xenTro1 -e 'create index tName_2 on all_mrna(tName(16),tStart)'
    hgsql xenTro1 -e 'drop index tName_3 on all_mrna'
    hgsql xenTro1 -e 'create index tName_3 on all_mrna(tName(16),tEnd)'

    hgsql xenTro1 -e 'drop index chrom on mrnaOrientInfo'
    hgsql xenTro1 -e 'create index chrom on mrnaOrientInfo(chrom(16),bin)'
    hgsql xenTro1 -e 'drop index chrom_2 on mrnaOrientInfo'
    hgsql xenTro1 -e 'create index chrom_2 on mrnaOrientInfo(chrom(16),chromStart)'
    hgsql xenTro1 -e 'drop index chrom_3 on mrnaOrientInfo'
    hgsql xenTro1 -e 'create index chrom_3 on mrnaOrientInfo(chrom(16),chromEnd)'

    hgsql xenTro1 -e 'drop index chrom on multiz5way'
    hgsql xenTro1 -e 'create index chrom on multiz5way(chrom(16),bin)'
    hgsql xenTro1 -e 'drop index chrom_2 on multiz5way'
    hgsql xenTro1 -e 'create index chrom_2 on multiz5way(chrom(16),chromStart)'
    hgsql xenTro1 -e 'drop index chrom_3 on multiz5way'
    hgsql xenTro1 -e 'create index chrom_3 on multiz5way(chrom(16),chromEnd)'

    hgsql xenTro1 -e 'drop index chrom on phastCons5'
    hgsql xenTro1 -e 'create index chrom on phastCons5(chrom(16))'

# MAKE DOWNLOADABLE SEQUENCE FILES (DONE, 2004-11-04, Fan)
ssh kksilo
cd /cluster/data/xenTro1
#- Build the .zip files
rm -rf zip
mkdir zip
# scaffolds
zip -j zip/scaffolds.zip downloads/xenopus.041015.fasta &
# RepeatMasker library
zip -j zip/RmLib.zip downloads/xt3.lib1.fasta &
# soft masked scaffold fasta
zip -j zip/softmask2.zip xenTro1.softmask2.fa &
# RepeatMasker out files
zip -j zip/repeats.zip repeats/repeats.out &
# hard masked scaffold fasta
zip -j zip/hardmask2.zip xenTro1.hardmask2.fa &
# TRF output files
cd bed/simpleRepeat
zip ../../zip/trf.zip trf.bed &
cd ../..
# get GenBank native mRNAs
cd /cluster/data/genbank
./bin/i386/gbGetSeqs -db=xenTro1 -native GenBank mrna \
        /cluster/data/xenTro1/zip/mrna.fa
# get GenBank xeno mRNAs
./bin/i386/gbGetSeqs -db=xenTro1 -xeno GenBank mrna \
        /cluster/data/xenTro1/zip/xenoMrna.fa &
# get native GenBank ESTs
./bin/i386/gbGetSeqs -db=xenTro1 -native GenBank est \
/cluster/data/xenTro1/zip/est.fa &

cd /cluster/data/xenTro1/zip
# zip GenBank native and xeno mRNAs and native ESTs, 
# after the above gbGetSeqs jobs are done.
zip -j mrna.zip mrna.fa 
zip -j xenoMrna.zip xenoMrna.fa 
zip -j est.zip est.fa 

ssh hgwdev
cd /cluster/data/xenTro1/zip
set gp = /usr/local/apache/htdocs/goldenPath/xenTro1
mkdir -p $gp/bigZips
cp -p *.zip $gp/bigZips
cd $gp/bigZips
md5sum *.zip > md5sum.txt
# Take a look at bigZips, update README.txt.

# MAKE VSHG17 DOWNLOADABLES (DONE 11/06/04 Fan)
    ssh kksilo
    cd /cluster/data/xenTro1/bed/bz.hg17/axtChain
    gzip -c all.chain > /cluster/data/xenTro1/zip/human.chain.gz
    gzip -c xenTro1.net > /cluster/data/xenTro1/zip/human.net.gz
   
    mkdir /cluster/data/xenTro1/zip/axtNet
    gzip -c ../axtNet/all.axt > /cluster/data/xenTro1/zip/axtNet/all.axt.gz
      
    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/xenTro1/vsHg17
    cd /usr/local/apache/htdocs/goldenPath/xenTro1/vsHg17
    mv /cluster/data/xenTro1/zip/human*.gz .
    mv /cluster/data/xenTro1/zip/axtNet .
    md5sum *.gz > md5sum.txt
    # Copy over & edit README.txt w/pointers to chain, net formats.

# MAKE VSMM5 DOWNLOADABLES (DONE 11/06/04 Fan)
    ssh kksilo
    cd /cluster/data/xenTro1/bed/bz.mm5/axtChain
    gzip -c all.chain > /cluster/data/xenTro1/zip/mouse.chain.gz
    gzip -c xenTro1.net > /cluster/data/xenTro1/zip/mouse.net.gz
   
    mkdir /cluster/data/xenTro1/zip/axtNet
    gzip -c ../axtNet/all.axt > /cluster/data/xenTro1/zip/axtNet/all.axt.gz
      
    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/xenTro1/vsMm5
    cd /usr/local/apache/htdocs/goldenPath/xenTro1/vsMm5
    mv /cluster/data/xenTro1/zip/mouse*.gz .
    mv /cluster/data/xenTro1/zip/axtNet .
    md5sum *.gz > md5sum.txt
    # Copy over & edit README.txt w/pointers to chain, net formats.

# MAKE VSGALGAL2 DOWNLOADABLES (DONE 11/06/04 Fan)
    ssh kksilo
    cd /cluster/data/xenTro1/bed/bz.galGal2/axtChain
    gzip -c all.chain > /cluster/data/xenTro1/zip/chicken.chain.gz
    gzip -c xenTro1.net > /cluster/data/xenTro1/zip/chicken.net.gz
   
    mkdir /cluster/data/xenTro1/zip/axtNet
    gzip -c ../axtNet/all.axt > /cluster/data/xenTro1/zip/axtNet/all.axt.gz
      
    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/xenTro1/vsGalGal2
    cd /usr/local/apache/htdocs/goldenPath/xenTro1/vsGalGal2
    mv /cluster/data/xenTro1/zip/chicken*.gz .
    mv /cluster/data/xenTro1/zip/axtNet .
    md5sum *.gz > md5sum.txt
    # Copy over & edit README.txt w/pointers to chain, net formats.

# MAKE VSDANRER1 DOWNLOADABLES (DONE 11/06/04 Fan)
    ssh kksilo
    cd /cluster/data/xenTro1/bed/bz.danRer1/axtChain
    gzip -c all.chain > /cluster/data/xenTro1/zip/zebrafish.chain.gz
    gzip -c xenTro1.net > /cluster/data/xenTro1/zip/zebrafish.net.gz
   
    mkdir /cluster/data/xenTro1/zip/axtNet
    gzip -c ../axtNet/all.axt > /cluster/data/xenTro1/zip/axtNet/all.axt.gz
      
    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/xenTro1/vsDanRer1
    cd /usr/local/apache/htdocs/goldenPath/xenTro1/vsDanRer1
    mv /cluster/data/xenTro1/zip/zebrafish*.gz .
    mv /cluster/data/xenTro1/zip/axtNet .
    md5sum *.gz > md5sum.txt
    # Copy over & edit README.txt w/pointers to chain, net formats.

# LOAD CPGISSLANDS (DONE 10/28/04 Fan)
    ssh hgwdev
    mkdir -p /cluster/data/xenTro1/bed/cpgIsland
    cd /cluster/data/xenTro1/bed/cpgIsland
    # Build software from Asif Chinwalla (achinwal@watson.wustl.edu)
    cvs co hg3rdParty/cpgIslands
    cd hg3rdParty/cpgIslands
    make
    mv cpglh.exe /cluster/data/xenTro1/bed/cpgIsland/
    
    ssh kksilo
    cd /cluster/data/xenTro1/bed/cpgIsland
    foreach f (../../softMasked2/*.fa)
      set fout=$f:t:r:r.cpg
      echo running cpglh on $f to $fout
      ./cpglh.exe $f > $fout
    end
    # Transform cpglh output to bed +
    cat << '_EOF_' > filter.awk
/* Input columns: */
/* chrom, start, end, len, CpG: cpgNum, perGc, cpg:gpc, observed:expected */
/* chr1\t 41776\t 42129\t 259\t CpG: 34\t 65.8\t 0.92\t 0.94 */
/* Output columns: */
/* chrom, start, end, name, length, cpgNum, gcNum, perCpg, perGc, obsExp */
/* chr1\t41775\t42129\tCpG: 34\t354\t34\t233\t19.2\t65.8\to0.94 */
{
$2 = $2 - 1;
width = $3 - $2;
printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
       $1, $2, $3, $5,$6, width,
       $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
}
'_EOF_'
    # << this line makes emacs coloring happy
    awk -f filter.awk chr*.cpg > cpgIsland.bed

    # load into database:
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/cpgIsland
    hgLoadBed xenTro1 cpgIslandExt -tab -noBin \
      -sqlTable=$HOME/kent/src/hg/lib/cpgIslandExt.sql cpgIsland.bed
    wc -l cpgIsland.bed 
#  44091 cpgIsland.bed
    featureBits xenTro1 cpgIslandExt
#19279778 bases of 1381238994 (1.396%) in intersection

# MAKE Human Proteins track (hg17 DONE 2005-1-15 braney)
    ssh kksilo
    mkdir -p /cluster/data/xenTro1/blastDb
    cd /cluster/data/xenTro1/blastDb
    for i in `cat ../chrom.lst`; do ls -1 ../$i/*/*.fa  ; done > list
    for i in `cat list`
    do
	ln -s $i .
	formatdb -i `basename $i` -p F
    done
    rm *.log *.fa list

    ssh kkr1u00
    mkdir -p /iscratch/i/xenTro1/blastDb
    cd /cluster/data/xenTro1/blastDb
    for i in nhr nin nsq; do cp *.$i /iscratch/i/xenTro1/blastDb     ; echo $i; done

    cd
    iSync > sync.out

    mkdir -p /cluster/data/xenTro1/bed/tblastn.hg17KG
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    echo  /iscratch/i/xenTro1/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  
    # back to kksilo
    exit

    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(300000/`wc query.lst | awk "{print \\\$1}"`\)
    # 42156/(300000/498) = 69.978960
    mkdir -p /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa
    split -l 70 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa/kg
    ln -s /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/kgfa kgfa
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/blastOut
    ln -s /cluster/bluearc/xenTro1/bed/tblastn.hg17KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.2       
        if pslCheck -prot $3.tmp                                                                          
        then                                                                                              
            mv $3.tmp $3                                                                                  
            rm -f $f.1 $f.2 
        fi
        exit 0                                                                                            
    fi                                                                                                    
fi                                                                                                        
rm -f $f.1 $f.2 $3.tmp $f.8
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec

    ssh kk
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    para create blastSpec
    para push
# Completed: 253435 of 253435 jobs
# CPU time in finished jobs:   58003988s  966733.13m 16112.22h  671.34d  1.839 y
# IO & Wait Time:              13196517s  219941.96m  3665.70h  152.74d  0.418 y
# Average job time:                 281s       4.68m     0.08h    0.00d
# Longest job:                     6238s     103.97m     1.73h    0.07d
# Submission to last job:        174503s    2908.38m    48.47h    2.02d

    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainSome
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=100000 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainSome

    ls -1dS `pwd`/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    ssh kki
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    para create chainSpec
    para push

# Completed: 603 of 603 jobs
# CPU time in finished jobs:        883s      14.72m     0.25h    0.01d  0.000 y
# IO & Wait Time:                 75640s    1260.66m    21.01h    0.88d  0.002 y
# Average job time:                 127s       2.12m     0.04h    0.00d
# Longest job:                      342s       5.70m     0.10h    0.00d
# Submission to last job:          5747s      95.78m     1.60h    0.07d

    exit
    # back to kksilo
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG/blastOut
    for i in kg??
    do 
	cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat u.*.psl m60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > /cluster/data/xenTro1/bed/tblastn.hg17KG/blastHg17KG.psl
    cd ..

    ssh hgwdev
    cd /cluster/data/xenTro1/bed/tblastn.hg17KG
    hgLoadPsl xenTro1 blastHg17KG.psl

    # back to kksilo
    rm -rf blastOut
# End tblastn


# MAKE JGI FILTERED GENES (DONE 3/16/05 kent)
#
# Copy email attatchment from Igor Grigoriev (IVGrigoriev@lbl.gov)
# to /cluster/data/xenTro1/bed/jgiFilteredModels/xenopus.FilteredModels
# and then
    ssh hgwdev
    cd /cluster/data/xenTro1/bed/jgiFilteredModels
    hgJgiFilteredModels xenopus.FilteredModels jgiFilteredModels.gff
    ldHgGene xenTro1 jgiFilteredModels jgiFilteredModels.gff
# Then edit trackDb to put in the track.


# MAKE NIBPICS (DONE 3/17/05 kent)
#
    # Load image data from NIBB (contact Naoto Ueno nueno@nibb.ac.jp)
    # from CD's into /cluster/store11/visiGene/offLine/nibbFrog

    # Get sequence of assembled ESTs from NIBB
    ssh hgwdev
    mkdir -p /cluster/data/xenTro1/bed/nibbPics
    cd /cluster/data/xenTro1/bed/nibbPics
    wget -O contigs.fa.gz http://xenopus.nibb.ac.jp/data/contigs.gz
    wget -O f.fa.gz http://xenopus.nibb.ac.jp/data/f.seq.gz
    wget -O r.fa.gz http://xenopus.nibb.ac.jp/data/r.seq.gz
    wget -O i.fa.gz http://xenopus.nibb.ac.jp/data/i.seq.gz
    gunzip contigs.fa.gz
    gunzip f.fa.gz
    gunzip r.fa.gz
    gunzip i.fa.gz

    # Figure out sequences used to probe images.
    nibbImageProbes /cluster/store11/visiGene/offline/nibbFrog i.fa f.fa r.fa nibbImageProbes.fa nibbImageProbes.tab

    # Go to pk, create directory for cluster job on SAN, and set up sequence.
    ssh pk
    cd /san/sanvol1/scratch/xenTro1/
    mkdir nibbPics
    cd nibbPics
    cp /cluster/data/xenTro1/bed/nibbPics/nibbImageProbes.fa .

    # Set up blatz list files
    mkdir run
    cd run
    mkdir psl
    awk '{print $1;}' /cluster/data/xenTro1/chrom.sizes > genome.lst
    ls -1 ../nibbImageProbes.fa  > mrna.lst


    # Make a little wrapper script that runs blatz and pslFilter
    cat << '_EOF_' > doBlatz
#!/bin/tcsh -ef
mkdir -p /tmp/nibPic
blatz -rna -minScore=6000 /san/sanvol1/scratch/xenTro1/xenTro1.2bit:$1 ../$2.fa -out=psl stdout | pslFilter -minScore=100 -noHead stdin /tmp/nibPic/$1_$2.psl
mv /tmp/nibPic/$1_$2.psl $3
'_EOF_'
    # << this line keeps emacs coloring happy
    chmod a+x doBlatz


    # Make gensub2 script
    cat << '_EOF_' > gsub
#LOOP
doBlatz $(root1) $(root2) {check out exists psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy

    #Create parasol batch
    gensub2 genome.lst mrna.lst gsub spec
    para create spec

    # Then do usual para try/para push/para time
#Completed: 27064 of 27064 jobs
#CPU time in finished jobs:     119714s    1995.23m    33.25h    1.39d  0.004 y
#IO & Wait Time:                 83315s    1388.59m    23.14h    0.96d  0.003 y
#Average job time:                   8s       0.13m     0.00h    0.00d
#Longest running job:                0s       0.00m     0.00h    0.00d
#Longest finished job:             254s       4.23m     0.07h    0.00d
#Submission to last job:          2338s      38.97m     0.65h    0.03d

    # Do sorting and filtering
    catDir psl | sort -k 10 | pslReps stdin nearBest.psl /dev/null -nohead -minAli=0.75 -nearTop=0.001 -minCover=0.25 -minNearTopSize=80
    sort -k 14 nearBest.psl > /cluster/data/xenTro1/bed/nibbPics/nibbImageProbes.psl


# MAKE NIBB EST MAPPINGS
    
    # Go to pk, create directory for cluster job on SAN, and set up sequence.
    ssh pk
    cd /san/sanvol1/scratch/xenTro1/
    mkdir nibPics
    cd nibPics
    mkdir split
    faSplit sequence /cluster/data/xenTro1/bed/nibPics/contigs.fa 16 split/xl
    	# Note things would go smoother using 4 instead of 16 in the faSplit above
	# I was worried about hippos, but there were none.

    # Set up blatz list files
    mkdir run2
    cd run2
    mkdir psl
    awk '{print $1;}' /cluster/data/xenTro1/chrom.sizes > genome.lst
    ls -1 ../split/*.fa > mrna.lst

    # Make a little wrapper script that runs blatz and pslFilter
    cat << '_EOF_' > doBlatz
#!/bin/tcsh -ef
mkdir -p /tmp/nibPic
blatz -rna -minScore=6000 /san/sanvol1/scratch/xenTro1/xenTro1.2bit:$1 ../split/$2.fa -out=psl stdout | pslFilter -minScore=100 -noHead stdin /tmp/nibPic/$1_$2.psl
mv /tmp/nibPic/$1_$2.psl $3
'_EOF_'
    # << this line keeps emacs coloring happy
    chmod a+x doBlatz

    # Make gensub2 script
    cat << '_EOF_' > gsub
#LOOP
doBlatz $(root1) $(root2) {check out exists psl/$(root1)_$(root2).psl}
#ENDLOOP
'_EOF_'
    # << this line keeps emacs coloring happy

    #Create parasol batch
    gensub2 genome.lst mrna.lst gsub spec
    para create spec

    # Then do usual para try/para push/para time
#Completed: 433024 of 433024 jobs
#CPU time in finished jobs:    5489117s   91485.29m  1524.75h   63.53d  0.174 y
#IO & Wait Time:               1109930s   18498.83m   308.31h   12.85d  0.035 y
#Average job time:                  15s       0.25m     0.00h    0.00d
#Longest running job:                0s       0.00m     0.00h    0.00d
#Longest finished job:             959s      15.98m     0.27h    0.01d
#Submission to last job:         23019s     383.65m     6.39h    0.27d

    # Merge results into one file.  This took at least an hour (left it going over night)
    catDir psl > ../raw.psl

    # Sort and do additional filtering.
    cd ..
    sort -k 10 raw.psl | pslReps stdin nearBest.psl /dev/null -nohead -minAli=0.80 -nearTop=0.001 -minCover=0.25 -minNearTopSize=100
    sort -k 14 nearBest.psl > contigs.psl

    # Convert names 
    nibbNameFix contigs.fa contigs.psl nibbEstContigs.fa nibbEstContigs.psl 

    # Still TODO - massage names a little to get rid of NCBIisms in a few places
    # and to decide how to link to NIBB.

    # Clean up.  This also takes a good while
    rm -r run1/psl split
    rm raw.psl nearBest.psl


#########################################################################
# MOUSE NET/CHAINS MM6 - Info contained in makeMm6.doc (200503 Hiram)


############################################################################
##  BLASTZ swap from mm8 alignments (DONE - 2006-02-21 - Hiram)
    ssh kk
    cd /cluster/data/mm8/bed/blastzXenTro1.2006-02-21
    time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=kk -chainMinScore=5000 -chainLinearGap=loose \
        -swap `pwd`/DEF > swap.out 2>&1 &

    time nice -n +19 featureBits xenTro1 chainMm8Link
    #   59307185 bases of 1381238994 (4.294%) in intersection


##########################################################################
# GenBank gbMiscDiff table (markd 2007-01-10)
# Supports `NCBI Clone Validation' section of mgcGenes details page

   # genbank release 157.0 now contains misc_diff fields for MGC clones
   # reloading mRNAs results in gbMiscDiff table being created.
   ./bin/gbDbLoadStep -reload -srcDb=genbank -type=mrna xenTro1