# for emacs: -*- mode: sh; -*-


# Tetraodon Nigirividis from Genoscope, version v6 (released 5/6/02)
#    Project website:
#    http://www.genoscope.cns.fr/externe/tetraodon/sequencing.html

#  NOTE:  this doc may have genePred loads that fail to include
#  the bin column.  Please correct that for the next build by adding
#  a bin column when you make any of these tables:
#
#  mysql> SELECT tableName, type FROM trackDb WHERE type LIKE "%Pred%";
#  +------------+--------------------+
#  | tableName  | type               |
#  +------------+--------------------+
#  | gaze       | genePred           |
#  | geneid     | genePred geneidPep |
#  | genscan    | genePred           |
#  | hoxGenes   | genePred           |
#  | cytokines  | genePred           |
#  | ecoresHg17 | genePred           |
#  +------------+--------------------+


# DOWNLOAD SEQUENCE 

    ssh kksilo
    mkdir /cluster/store7/tetNig1
    ln -s /cluster/store7/tetNig1 /cluster/data
    cd /cluster/data/tetNig1
    wget http://www.genoscope.cns.fr/externe/tetraodon/Data/Reads/tetraodon6.gz

# DOWNLOAD SEQUENCE BY CHROMOSOME, GFF AND AGP FILES (DONE, 2004-08-13, hartera)
    # Genoscope provided the url, username and password  - this is V7
    # download agp, gff and accompanying FASTA files by chromosome
    ssh kksilo
    cd /cluster/data/tetNig1
    wget --timestamp --http-user=   --http-passwd= \
         http://www.genoscope.cns.fr/nda/tetraodon/GFF/tetraodon.all.gff.*
    wget --timestamp --http-user=   --http-passwd=  \
         http://www.genoscope.cns.fr/nda/tetraodon/GP/chr.agp
    wget --timestamp --http-user=   --http-passwd=   \
         http://www.genoscope.cns.fr/nda/tetraodon/GP/chr.agp.info
    wget --timestamp -r -l1 --http-user=   --http-passwd=   \
         http://www.genoscope.cns.fr/nda/tetraodon/GP/unmasked/
    mv ./www.genoscope.cns.fr/secure-nda/tetraodon/GP/unmasked/* .
    rm -r www.genoscope.cns.fr
    rm index*
    # unzip FASTA files
    bzip2 -d chr*.bz2
    bzip2 -d tetraodon.all.gff.bz2
    # no complete mitochondrial sequence in Genbank

# Create list of chromosomes (DONE, 2004-08-13, hartera)
    ssh hgwdev
    cd /cluster/data/tetNig1
    foreach f (*.fa)
       set chr = `echo $f:h | sed -e 's/^chr//' | sed -e 's/\.fa$//'` 
      echo $chr >> chrom
    end
    sort -n chrom > chrom.lst
    rm chrom

# SPLIT AGP FILES BY CHROMOSOME (DONE, 2004-08-16, hartera)

   ssh kksilo
   cd /cluster/data/tetNig1
   foreach c (`cat chrom.lst`)
      mkdir $c
      mv chr${c}.fa ./$c
      perl -we "while(<>){if (/^chr$c\t/) {print;}}" \
        ./chr.agp > $c/chr$c.agp
   end
   # agp has either gaps (N) or scaffolds (S), change S to W for WGS
   # agp files are not standard formats so need to reformat for
   # processing programs to run - missing field 4 which is just line number
   # and need to reformat gap line to:
   # chr chr_start chr_end line type length gap type linkage
   foreach c (`cat chrom.lst`)
      cp $c/chr${c}.agp $c/chr${c}.agp.bak
      echo "Processing chr${c}.agp ..."
      awk 'BEGIN {OFS="\t"} \
         { if ($4 == "S") print $1, $2, $3, NR, "W", $5, $6, $7, $8, $9; \
           if ($4 == "N" && $5 == "GAP_IS") \
              print $1, $2, $3, NR, $4, $9, "fragment", "yes"; \
           if ($4 == "N" && $5 == "GAP_UC") \
              print $1, $2, $3, NR, $4, $9, "contig", "yes"; \
           if ($4 == "N" && $5 == "GAP_UN") \
              print $1, $2, $3, NR, $4, $9, "contig", "no"; \
           if ($4 == "N" && $5 == "CENT") \
              print $1, $2, $3, NR, $4, $9, "centromere", "no" }' \
              $c/chr${c}.agp >> $c/chr${c}.new.agp
      mv $c/chr${c}.new.agp $c/chr{$c}.agp
   end
   # check agp files and then remove backup files
   foreach c (`cat chrom.lst`)
      rm $c/chr${c}.agp.bak
   end

# CHECK FASTA AND AGP FILES (DONE, 2004-08-16, hartera)
   ssh kksilo
   cd /cluster/data/tetNig1
   foreach c (`cat chrom.lst`)
      foreach f ( $c/chr$c.agp) 
         set agpLen = `tail -1 $f | awk '{print $3;}'`
         set h = $f:r
         set g = $h:r
         echo "Getting size of $g.fa"
         set faLen = `faSize $g.fa | awk '{print $1;}'`
         if ($agpLen == $faLen) then
           echo "   OK: $f length = $g length = $faLen"
         else
           echo "ERROR:  $f length = $agpLen, but $g length = $faLen"
         endif
      end
   end
   # all are OK so FASTA files sizes are in agreement with agp files

# BREAK UP SEQUENCE INTO 5MB CHUNKS AT CONTIGS/GAPS FOR CLUSTER RUNS
# (DONE, 2004-08-16, hartera)

   ssh kksilo
   cd /cluster/data/tetNig1
    
   foreach c (`cat chrom.lst`)
      foreach agp ($c/chr$c.agp)
        if (-e $agp) then
          set fa = $c/chr$c.fa
          echo splitting $agp and $fa
          cp -p $agp $agp.bak
          cp -p $fa $fa.bak
          splitFaIntoContigs $agp $fa . -nSize=5000000
        endif
      end
   end
 
# MAKE JKSTUFF AND BED DIRECTORIES (DONE, 2004-08-16, hartera)
    # This used to hold scripts -- better to keep them inline here
    # Now it should just hold lift file(s) and
    # temporary scripts made by copy-paste from this file.
    ssh hgwdev
    mkdir /cluster/data/tetNig1/jkStuff
    # This is where most tracks will be built:
    mkdir /cluster/data/tetNig1/bed

# CREATING DATABASE (DONE, 2004-08-16, hartera)
    # Create the database.
    # next machine
    ssh hgwdev
    echo 'create database tetNig1' | hgsql ''
    # if you need to delete that database:  !!! WILL DELETE EVERYTHING !!!
    echo 'drop database tetNig1' | hgsql tetNig1
    # Use df to make sure there is at least 5 gig free on
    df -h /var/lib/mysql
# Before loading data:
# Filesystem            Size  Used Avail Use% Mounted on
# /dev/sdc1             1.8T  525G  1.2T  32% /var/lib/mysql

# CREATING GRP TABLE FOR TRACK GROUPING (DONE, 2004-08-16, hartera)
    # next machine
    ssh hgwdev
    #  the following command copies all the data from the table
    #  grp in the database galGal2 to our new database ce2
    echo "create table grp (PRIMARY KEY(NAME)) select * from galGal2.grp" \
      | hgsql tetNig1
    # if you need to delete that table:   !!! WILL DELETE ALL grp data !!!
    echo 'drop table grp;' | hgsql tetNig1

# REPEAT MASKING - Run RepeatMasker on chroms (DONE, 2004-08-17, hartera)
#- Split contigs into 500kb chunks, at gaps if possible:
    ssh kksilo
    cd /cluster/data/tetNig1
    foreach c (`cat chrom.lst`)
      foreach d ($c/chr${c}*_?{,?})
        cd $d
        echo "splitting $d"
        set contig = $d:t
        ~/bin/i386/faSplit gap $contig.fa 500000 ${contig}_ -lift=$contig.lft \
            -minGapSize=100
        cd ../..
      end
    end

#- Make the run directory and job list:
    cd /cluster/data/tetNig1
    cat << '_EOF_' > jkStuff/RMTetraodon
#!/bin/csh -fe
                                                                                
cd $1
pushd .
/bin/mkdir -p /tmp/tetNig1/$2
/bin/cp $2 /tmp/tetNig1/$2/
cd /tmp/tetNig1/$2
/cluster/bluearc/RepeatMasker/RepeatMasker -ali -s $2
popd
/bin/cp /tmp/tetNig1/$2/$2.out ./
if (-e /tmp/tetNig1/$2/$2.align) /bin/cp /tmp/tetNig1/$2/$2.align ./
if (-e /tmp/tetNig1/$2/$2.tbl) /bin/cp /tmp/tetNig1/$2/$2.tbl ./
if (-e /tmp/tetNig1/$2/$2.cat) /bin/cp /tmp/tetNig1/$2/$2.cat ./
/bin/rm -fr /tmp/tetNig1/$2/*
/bin/rmdir --ignore-fail-on-non-empty /tmp/tetNig1/$2
/bin/rmdir --ignore-fail-on-non-empty /tmp/tetNig1
'_EOF_'
    # << this line makes emacs coloring happy
    chmod +x jkStuff/RMTetraodon
    mkdir RMRun
    cp /dev/null RMRun/RMJobs
    foreach c (`cat chrom.lst`)
      foreach d ($c/chr${c}_?{,?})
          set ctg = $d:t
          foreach f ( $d/${ctg}_?{,?}.fa )
            set f = $f:t
            echo /cluster/data/tetNig1/jkStuff/RMTetraodon \
                 /cluster/data/tetNig1/$d $f \
               '{'check out line+ /cluster/data/tetNig1/$d/$f.out'}' \
              >> RMRun/RMJobs
          end
      end
    end

    #- Do the run
    ssh kk
    cd /cluster/data/tetNig1/RMRun
    para create RMJobs
    para try, para check, para check, para push, para check,...
# para time
# Completed: 940 of 940 jobs
# CPU time in finished jobs:    6654614s  110910.24m  1848.50h   77.02d  0.211 y
# IO & Wait Time:                 54843s     914.05m    15.23h    0.63d  0.002 y
# Average job time:                7138s     118.96m     1.98h    0.08d
# Longest job:                    11024s     183.73m     3.06h    0.13d
# Submission to last job:         34509s     575.15m     9.59h    0.40d

    #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
    ssh kksilo
    cd /cluster/data/tetNig1
    foreach d (*/chr*_?{,?})
      set contig = $d:t
      echo $contig
      liftUp $d/$contig.fa.out $d/$contig.lft warn $d/${contig}_*.fa.out \
        > /dev/null
    end

    #- Lift pseudo-contigs to chromosome level
    foreach c (`cat chrom.lst`)
      echo lifting $c
      cd $c
      if (-e lift/ordered.lft && ! -z lift/ordered.lft) then
        liftUp chr$c.fa.out lift/ordered.lft warn `cat lift/oOut.lst` \
        > /dev/null
      endif
      cd ..
    end

    #- Load the .out files into the database with:
    ssh hgwdev
    cd /cluster/data/tetNig1
    hgLoadOut tetNig1 */chr*.fa.out

# MAKE LIFTALL.LFT (DONE, 2004-08-16, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1
    cat */lift/ordered.lft > jkStuff/liftAll.lft

# SIMPLE REPEATS TRACK (DONE, 2004-08-17, hartera)
    # TRF runs pretty quickly now... it takes a few hours total runtime,
    # so instead of binrsyncing and para-running, just do this on the
    # local fileserver
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/simpleRepeat
    cd /cluster/data/tetNig1/bed/simpleRepeat
    mkdir trf
    cp /dev/null jobs.csh
    foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
      set ctg = $d:t
      foreach f ($d/${ctg}.fa)
        set fout = $f:t:r.bed
        echo $fout
        echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \
        >> jobs.csh
      end
    end

    chmod a+x jobs.csh
    csh -ef jobs.csh >&! jobs.log &
    # check on this with
    tail -f jobs.log
    wc -l jobs.csh
    ls -1 trf | wc -l
    endsInLf trf/*

    liftUp simpleRepeat.bed /cluster/data/tetNig1/jkStuff/liftAll.lft warn \
           trf/*.bed
    
    # Load into the database:
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/simpleRepeat
    hgLoadBed tetNig1 simpleRepeat \
      /cluster/data/tetNig1/bed/simpleRepeat/simpleRepeat.bed \
      -sqlTable=$HOME/src/hg/lib/simpleRepeat.sql

# PROCESS SIMPLE REPEATS INTO MASK (DONE, 2004-08-17, hartera)
    # After the simpleRepeats track has been built, make a filtered version
    # of the trf output: keep trf's with period <= 12:
    ssh kksilo
    cd /cluster/data/tetNig1/bed/simpleRepeat
    mkdir -p trfMask
    foreach f (trf/chr*.bed)
      awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t
    end
    # Lift up filtered trf output to chrom coords as well:
    cd /cluster/data/tetNig1
    mkdir bed/simpleRepeat/trfMaskChrom
    foreach c (`cat chrom.lst`)
      if (-e $c/lift/ordered.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
          $c/lift/ordered.lst > $c/lift/oTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr$c.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/oTrf.lst`
      endif
      if (-e $c/lift/random.lst) then
        perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \
           $c/lift/random.lst > $c/lift/rTrf.lst
        liftUp bed/simpleRepeat/trfMaskChrom/chr${c}_random.bed \
          jkStuff/liftAll.lft warn `cat $c/lift/rTrf.lst`
      endif
    end

# MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF 
# (DONE, 2004-08-17, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1
    # Soft-mask (lower-case) the contig and chr .fa's,
    # then make hard-masked versions from the soft-masked.
    set trfCtg=bed/simpleRepeat/trfMask
    set trfChr=bed/simpleRepeat/trfMaskChrom
    foreach f (*/chr*.fa)
      echo "repeat- and trf-masking $f"
      maskOutFa -soft $f $f.out $f
      set chr = $f:t:r
      maskOutFa -softAdd $f $trfChr/$chr.bed $f
      echo "hard-masking $f"
      maskOutFa $f hard $f.masked
    end

    foreach c (`cat chrom.lst`)
      echo "repeat- and trf-masking contigs of chr$c"
      foreach d ($c/chr*_?{,?})
        set ctg=$d:t
        set f=$d/$ctg.fa
        maskOutFa -soft $f $f.out $f
        maskOutFa -softAdd $f $trfCtg/$ctg.bed $f
        maskOutFa $f hard $f.masked
      end
    end

    # Build nib files, using the soft masking in the fa
    mkdir nib
    foreach f (*/chr*.fa)
      faToNib -softMask $f nib/$f:t:r.nib
    end

# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION  (DONE, 2004-08-17, hartera)
    # Make symbolic links from /gbdb/tetNig1/nib to the real nibs.
    ssh hgwdev
    cd /cluster/data/tetNig1
    mkdir -p /gbdb/tetNig1/nib
    foreach f (/cluster/data/tetNig1/nib/chr*.nib)
      ln -s $f /gbdb/tetNig1/nib
    end
    # Load /gbdb/tetNig1/nib paths into database and save size info
    # hgNibSeq creates chromInfo table
    hgNibSeq -preMadeNib tetNig1 /gbdb/tetNig1/nib */chr*.fa
    echo "select chrom,size from chromInfo" | hgsql -N tetNig1 > chrom.sizes
    # take a look at chrom.sizes, should be 27 lines
    wc chrom.sizes

    # Make one big 2bit file as well, and make a link to it in
    # /gbdb/tetNig1/nib because hgBlat looks there:
    faToTwoBit */chr*.fa tetNig1.2bit
    ln -s /cluster/data/tetNig1/tetNig1.2bit /gbdb/tetNig1/nib/

# MAKE GOLD AND GAP TRACKS (DONE, 2004-08-17, hartera)
    ssh hgwdev
    cd /cluster/data/tetNig1
    # the gold and gap tracks are created from the chrN.agp file 
    hgGoldGapGl -noGl -chromLst=chrom.lst tetNig1 /cluster/data/tetNig1 .

# MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR TETNIG1
# (DONE, 2004-06-18, hartera)
# UPDATED SOURCE AND ASSEMBLY DATE IN dbDb (DONE, 2004-09-09, hartera)
                                                                         
    # Make trackDb table so browser knows what tracks to expect:
    ssh hgwdev
    mkdir -p ~/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1
                                                                                
    cd $HOME/kent/src/hg/makeDb/trackDb
    cvs up -d -P
    # Edit that makefile to add tetNig1 in all the right places and do
    make update
    make alpha
    cvs commit -m "Added tetNig1." makefile
                                                                                
    # Add dbDb and defaultDb entries:
    echo 'insert into dbDb (name, description, nibPath, organism,  \
          defaultPos, active, orderKey, genome, scientificName,  \
          htmlPath, hgNearOk, hgPbOk, sourceName)  \
          values("tetNig1", "May 2002", \
          "/gbdb/tetNig1/nib", "Tetraodon", "chr1:1000-4000", 1, \
          39, "Tetraodon", "Tetraodon nigroviridis", \
          "/gbdb/tetNig1/html/description.html", 0, 0, \
          "Genoscope and Whitehead Institute, V6");' \
    | hgsql -h genome-testdb hgcentraltest
    echo 'insert into defaultDb (genome, name) \
          values ("Tetraodon", "tetNig1");' \
          | hgsql -h genome-testdb hgcentraltest
    # add correct assembly date and source (2004-09-08, hartera)
    echo 'update dbDb set description = "Feb 2004" where name = "tetNig1";' \
          | hgsql -h genome-testdb hgcentraltest
    echo 'update dbDb set sourceName = "Genoscope and Broad Institute, V7" \
          where name = "tetNig1";' | hgsql -h genome-testdb hgcentraltest
    # correction to assembly date format (2004-09-09)
    echo 'update dbDb set description = "Feb. 2004" where name = "tetNig1";' \
          | hgsql -h genome-testdb hgcentraltest

# MAKE DESCRIPTION/SAMPLE POSITION HTML PAGE (DONE, 2004-08-23, hartera)
    ssh hgwdev
    mkdir /cluster/data/tetNig1/html
    cd /cluster/data/tetNig1/html
  # make a symbolic link from /gbdb/tetNig1/html to /cluster/data/tetNig1/html
    mkdir /gbdb/tetNig1
    ln -s /cluster/data/tetNig1/html /gbdb/tetNig1/html
    # Add a description page for zebrafish
    cd /cluster/data/tetNig1/html
    cp /cluster/data/fr1/html/*.html .
    # Edit this for zebrafish
    # create a description.html page here
    mkdir -p ~/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1
    cd ~/kent/src/hg/makeDb/trackDb/
    cvs add tetraodon
    cd tetraodon
    cvs add tetNig1
    # Add description page here too
    cp /cluster/data/tetNig1/html/description.html \
       $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/
    chmod a+r \
          $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/description.html
    cd $HOME/kent/src/hg/makeDb/trackDb/tetraodon/tetNig1/
    # Check it in and copy (ideally using "make alpha" in trackDb) to
    # /gbdb/tetNig1/html
    cvs add description.html
    cvs commit description.html

# PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS (DONE, 2004-08-18, hartera)
    ssh kkr1u00
    # Chrom-level mixed nibs that have been repeat- and trf-masked:
    rm -rf /iscratch/i/tetNig1/nib
    mkdir -p /iscratch/i/tetNig1/nib
    cp -p /cluster/data/tetNig1/nib/chr*.nib /iscratch/i/tetNig1/nib
    # Pseudo-contig fa that have been repeat- and trf-masked:
    rm -rf /iscratch/i/tetNig1/trfFa
    mkdir /iscratch/i/tetNig1/trfFa
    foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
      cp $d/$d:t.fa /iscratch/i/tetNig1/trfFa
    end
    rm -rf /iscratch/i/tetNig1/rmsk
    mkdir -p /iscratch/i/tetNig1/rmsk
    cp -p /cluster/data/tetNig1/*/chr*.fa.out /iscratch/i/tetNig1/rmsk
    cp -p /cluster/data/tetNig1/tetNig1.2bit /iscratch/i/tetNig1/
    iSync

    # add to /cluster/bluearc too
    ssh kksilo

    # masked contigs
    rm -fr /cluster/bluearc/scratch/tetra/tetNig1/trfFa
    mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/trfFa
    foreach d (/cluster/data/tetNig1/*/chr*_?{,?})
      cp $d/$d:t.fa /cluster/bluearc/scratch/tetra/tetNig1/trfFa
    end

    # masked chrom nibs
    rm -fr /cluster/bluearc/scratch/tetra/tetNig1/nib
    mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/nib
    cp -p /cluster/data/tetNig1/nib/chr*.nib /cluster/bluearc/scratch/tetra/tetNig1/nib

    # fasta files  - CHECK FOR RANDOMS
    cd /cluster/data/tetNig1
    rm -fr /cluster/bluearc/scratch/tetra/tetNig1/fasta
    mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/fasta
    cp -p */*.fa /cluster/bluearc/scratch/tetra/tetNig1/fasta

    # RepeatMasker *.out files - CHECK FOR RANDOMS
    rm -rf /cluster/bluearc/scratch/tetra/tetNig1/rmsk
    mkdir -p /cluster/bluearc/scratch/tetra/tetNig1/rmsk
    cp -p ./*/chr*.fa.out /cluster/bluearc/scratch/tetra/tetNig1/rmsk

# CREATE gc5Base wiggle TRACK (DONE, 2004-08-19, hartera)
    ssh kki
    mkdir /cluster/data/tetNig1/bed/gc5Base
    cd /cluster/data/tetNig1/bed/gc5Base
    #   in the script below, the 'grep -w GC' selects the lines of
    #   output from hgGcPercent that are real data and not just some
    #   information from hgGcPercent.  The awk computes the number
    #   of bases that hgGcPercent claimed it measured, which is not
    #   necessarily always 5 if it ran into gaps, and then the division
    #   by 10.0 scales down the numbers from hgGcPercent to the range
    #   [0-100].  Two columns come out of the awk print statement:
    #   <position> and <value> which are fed into wigAsciiToBinary through
    #   the pipe.  It is set at a dataSpan of 5 because each value
    #   represents the measurement over five bases beginning with
    #   <position>.  The result files end up in ./wigData5.
    #   A new script is used (from makeHg17.doc) which gets around the
    #   problem that wigAsciiToBinary was calculating chromEnd to be
    #   beyond the real chromosome end

    mkdir wigData5 dataLimits5
    cat << '_EOF_' > kkRun.sh
#!/bin/sh
NIB=$1
                                                                                
chr=${NIB/.nib/}
chrom=${chr#chr}
                                                                                
hgGcPercent -chr=${chr} -doGaps -file=stdout -win=5 tetNig1 \
        /iscratch/i/tetNig1/nib | \
    grep -w GC | \
    awk '{if (($3-$2) >= 5) {printf "%d\t%.1f\n", $2+1, $5/10.0} }' | \
    wigAsciiToBinary -dataSpan=5 -chrom=${chr} \
        -wibFile=wigData5/gc5Base_${chrom} \
            -name=${chrom} stdin 2> dataLimits5/${chr}
'_EOF_'
    # << this line makes emacs coloring happy
    chmod +x kkRun.sh
                                                                                
    ls /iscratch/i/tetNig1/nib  > nibList
    cat << '_EOF_' > gsub
#LOOP
./kkRun.sh $(path1)
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy

    gensub2 nibList single gsub jobList
    para create jobList
    para try, check, push ... etc
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:        587s       9.79m     0.16h    0.01d  0.000 y
# IO & Wait Time:                    16s       0.26m     0.00h    0.00d  0.000 y
# Average job time:                  22s       0.37m     0.01h    0.00d
# Longest job:                      187s       3.12m     0.05h    0.00d
# Submission to last job:           623s      10.38m     0.17h    0.01d

    # load the .wig files back on hgwdev:
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/gc5Base
    # up to here
    hgLoadWiggle -pathPrefix=/gbdb/tetNig1/wib/gc5Base \
                 tetNig1 gc5Base wigData5/*.wig
    # and symlink the .wib files into /gbdb
    mkdir -p /gbdb/tetNig1/wib/gc5Base
    ln -s `pwd`/wigData5/*.wib /gbdb/tetNig1/wib/gc5Base

    # And then the zoomed data view
    ssh kki
    cd /cluster/data/tetNig1/bed/gc5Base
    mkdir wigData5_1K dataLimits5_1K
                                                                                
    cat << '_EOF_' > kkRunZoom.sh
#!/bin/sh
NIB=$1
                                                                                
chr=${NIB/.nib/}
chrom=${chr#chr}
                                                                                
hgGcPercent -chr=${chr} -doGaps -file=stdout -win=5 tetNig1 \
        /iscratch/i/tetNig1/nib | \
    grep -w GC | \
    awk '{if (($3-$2) >= 5) {printf "%d\t%.1f\n", $2+1, $5/10.0} }' | \
    wigZoom -dataSpan=1000 stdin | wigAsciiToBinary -dataSpan=1000 \
        -chrom=${chr} -wibFile=wigData5_1K/gc5Base_${chrom}_1K \
            -name=${chrom} stdin 2> dataLimits5_1K/${chr}
'_EOF_'
    # << this line makes emacs coloring happy
    chmod +x kkRunZoom.sh
                                                                                
    cat << '_EOF_' > gsubZoom
#LOOP
./kkRunZoom.sh $(path1)
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    gensub2 nibList single gsubZoom jobListZoom
    para create jobListZoom
    para try, check, push, ... etc.
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:        571s       9.51m     0.16h    0.01d  0.000 y
# IO & Wait Time:                    40s       0.67m     0.01h    0.00d  0.000 y
# Average job time:                  23s       0.38m     0.01h    0.00d
# Longest job:                      223s       3.72m     0.06h    0.00d
# Submission to last job:           986s      16.43m     0.27h    0.01d

    #   Then load these .wig files into the same database as above
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/gc5Base
    hgLoadWiggle -pathPrefix=/gbdb/tetNig1/wib/gc5Base \
        -oldTable tetNig1 gc5Base wigData5_1K/*.wig
    # and symlink these .wib files into /gbdb
    mkdir -p /gbdb/tetNig1/wib/gc5Base
    ln -s `pwd`/wigData5_1K/*.wib /gbdb/tetNig1/wib/gc5Base

# CREATE 10.OOC, 11.OOC FILES FOR BLAT (DONE, 2004-08-20, hartera)
    # Use -repMatch=130 (based on size - for human 1024 was used, Tetraodon 
    # is about 380 Mb in length and it is 12.7% the size of the human genome) 
    ssh kkr1u00
    mkdir /cluster/data/tetNig1/bed/ooc
    cd /cluster/data/tetNig1/bed/ooc
    mkdir -p /cluster/bluearc/tetNig1
    ls -1 /cluster/data/tetNig1/nib/chr*.nib > nib.lst
    blat nib.lst /dev/null /dev/null -tileSize=11 \
      -makeOoc=/cluster/bluearc/tetNig1/11.ooc -repMatch=130
    # Wrote 8776 overused 11-mers to /cluster/bluearc/tetNig1/11.ooc
    # For 10.ooc, repMatch = 4096 for human, so use 520
    blat nib.lst /dev/null /dev/null -tileSize=10 \
      -makeOoc=/cluster/bluearc/tetNig1/10.ooc -repMatch=520
    # Wrote 1976 overused 11-mers to /cluster/bluearc/tetNig1/11.ooc
    # keep copies of ooc files in this directory and copy to iscratch
    cp /cluster/bluearc/tetNig1/*.ooc .
    cp -p /cluster/bluearc/tetNig1/*.ooc /iscratch/i/tetNig1/
    iSync
                                                                       
# AUTO UPDATE GENBANK MRNA AND EST RUN  (DONE, 2004-08-24, hartera and markd)
    ssh eieio
    cd /cluster/data/genbank
    # This is a new organism, edit the etc/genbank.conf file and add:
# tetNig1 (Tetraodon)
tetNig1.genome = /iscratch/i/tetNig1/nib/chr*.nib
tetNig1.lift = /cluster/data/tetNig1/jkStuff/liftAll.lft
tetNig1.downloadDir = tetNig1

    # Default includes native genbank mRNAs and ESTs,
    # genbank xeno mRNAs but no xenoESTs, native RefSeq mRNAs but
    # not xeno RefSeq
    cvs commit -m "Added Tetraodon." etc/genbank.conf
    # Edit src/align/gbBlat to add /iscratch/i/tetNig1/11.ooc
    cd ~/kent/src/hg/makeDb/genbank
    # Add line: TETNIG_OOC=/iscratch/i/tetNig1/11.ooc
    # Add line:  tetNig*) oocOpt="-ooc=${TETNIG_OOC}" ;;
    cvs diff src/align/gbBlat
    make
    cvs update src/align/gbBlat
    cvs commit -m "Added 11.ooc for tetNig1." src/align/gbBlat
    # Edit src/lib/gbGenome.c to add new genome
    # ADD: static char *tetNigNames[] = {"Tetraodon nigroviridis", NULL};
    # ADD: {"tetNig", tetNigNames, NULL},
    # to static struct dbToSpecies dbToSpeciesMap[]
    cvs diff src/lib/gbGenome.c
    make
    cvs update src/lib/gbGenome.c
    cvs commit -m "Added Tetraodon, tetNig." src/lib/gbGenome.c
    # Install to /cluster/data/genbank:
    make install-server

    ssh eieio
    cd /cluster/data/genbank
    # This is an -initial run, RefSeq mRNA only:
    nice bin/gbAlignStep -srcDb=refseq -type=mrna -verbose=1 -initial tetNig1
    # Load results for RefSeq mRNAs:
    ssh hgwdev
    cd /cluster/data/genbank
    # There are no RefSeq mRNAs for Tetraodon

    # -initial for GenBank mRNAs
    nice ./bin/gbAlignStep -initial -srcDb=genbank -type=mrna tetNig1

    # Load results for GenBank mRNAs
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 tetNig1
    cd /cluster/data/genbank/work
    mv initial.tetNig1 initial.tetNig1.genbank.mrna

    # -initial for ESTs
    ssh eieio
    cd /cluster/data/genbank
    nice bin/gbAlignStep -srcDb=genbank -type=est -verbose=1 -initial tetNig1

    # Load results for GenBank ESTs
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 tetNig1
    cd /cluster/data/genbank/work
    mv initial.tetNig1 initial.tetNig1.genbank.est

    # Everything loaded ok, so clean up
    rm -r /cluster/data/genbank/work/initial.tetNig1*

# BLASTZ SWAP FOR hg17 vs tetNig1 BLASTZ TO CREATE tetNig1 vs hg17 BLASTZ
# (DONE, 2004-08-26, hartera)
    ssh kolossus
    mkdir -p /cluster/data/tetNig1/bed/blastz.hg17.swap
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap
    # use axtChrom from blastzTetNig1 on hg17
    set aliDir = /cluster/data/hg17/bed/blastz.tetNig1
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom
    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 709M    /cluster/data/hg17/bed/blastz.tetNig1/axtChrom
    # 710M    unsorted
    # 709M    axtChrom
    rm -r unsorted
    # translate sorted axt files into psl
    ssh kolossus
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap
    mkdir -p pslChrom
    set tbl = "blastzHg17"
    foreach f (axtChrom/chr*.axt)
      set c=$f:t:r
      echo "Processing chr $c"
      /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
    end
    # Load database tables
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/pslChrom
    foreach f (./*.psl)
      /cluster/bin/i386/hgLoadPsl tetNig1 $f
      echo "$f Done"
    end

# CHAIN HUMAN (hg17) BLASTZ (DONE, 2004-08-27, hartera)
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChrom/*.axt \
                    > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
                                                                                
    # Make our own linear gap file with reduced gap penalties,
    # in hopes of getting longer chains - works well for species at
    # chicken-human distance or greater
    cat << '_EOF_' > ../../chickenHumanTuned.gap
tablesize^V     11
smallSize^V     111
position^V      1^V     2^V     3^V     11^V    111^V   2111^V  12111^V 32111^V
72111^V 152111^V        252111
qGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
31600^V 56600
tGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
31600^V 56600
bothGap^V       625^V   660^V   700^V   750^V   900^V   1400^V  4000^V  8000^V
16000^V 32000^V 57000
'_EOF_'
# << this line makes emacs coloring happy
                                                                                
cat << '_EOF_' > doChain
#!/bin/csh
axtChain -linearGap=../../chickenHumanTuned.gap $1 \
    /iscratch/i/tetNig1/nib \
    /iscratch/i/gs.18/build35/bothMaskedNibs $2 >& $3 
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:       2207s      36.78m     0.61h    0.03d  0.000 y
# IO & Wait Time:                   124s       2.07m     0.03h    0.00d  0.000 y
# Average job time:                  86s       1.44m     0.02h    0.00d
# Longest job:                      160s       2.67m     0.04h    0.00d
# Submission to last job:           334s       5.57m     0.09h    0.00d

   # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
                                                                                
    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r >> hist5000.out
      textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
      echo ""
    end
    # apart from chrUn_random, not too many chains with score < 5000
    # load chr 1 into table
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chain
    hgLoadChain tetNig1 chr1_chainHg17 chr1.chain
# no RefSeqs for Tetraodon so use mrna table
# featureBits -chrom=chr1 tetNig1 all_mrna chainHg17Link -enrichment
# all_mrna 2.637%, chainHg17Link 9.262%, both 0.772%, cover 29.28%, enrich 3.16x
    # try filtering with minScore < 5000
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    mv all.chain all.chain.unfiltered
    chainFilter -minScore=5000 all.chain.unfiltered > all.chain
    chainSplit chainFilt5k all.chain
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chainFilt5k
    hgLoadChain tetNig1 chr1_chainHg17Filt5k chr1.chain
# featureBits -chrom=chr1 tetNig1 all_mrna chainHg17Filt5kLink -enrichment
# all_mrna 2.637%, chainHg17Filt5kLink 8.746%, both 0.746%, cover 28.29%, 
# enrich 3.23x

# add all chains for minScore=5000 filtered chains
# remove test chain tables for chr1
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    hgsql -e "drop table chr1_chainHg17;" tetNig1
    hgsql -e "drop table chr1_chainHg17Link;" tetNig1
    hgsql -e "drop table chr1_chainHg17Filt5k;" tetNig1
    hgsql -e "drop table chr1_chainHg17Filt5kLink;" tetNig1
    mv chainFilt5k chain

    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain tetNig1 ${c}_chainHg17 $i
        echo done $c
    end

# NET HUMAN (hg17) BLASTZ (DONE, 2004-08-27, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    mkdir preNet
    cd chain
    foreach i (*.chain)
       echo preNetting $i
       /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                     ../preNet/$i
    end
    cd ..
    mkdir n1
    cd preNet
    foreach i (*.chain)
      set n = $i:r.net
      echo primary netting $i
      /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                 ../n1/$n /dev/null
    end
    cd ..
    cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
    # memory usage 55447552, utime 259 s/100, stime 33

    # Add classification info using db tables:
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    # netClass looks for ancient repeats in one of the databases
    # hg17 has this table - hand-curated by Arian but this is for
    # human-rodent comparisons so do not use here, use -noAr option
    mkdir -p /cluster/bluearc/hg17/linSpecRep.notInTetraodon
    mkdir -p /cluster/bluearc/tetNig1/linSpecRep.notInHuman
    cp -p /iscratch/i/hg17/linSpecRep.notInTetraodon/* \
          /cluster/bluearc/hg17/linSpecRep.notInTetraodon
    cp -p /iscratch/i/tetNig1/linSpecRep.notInHuman/* \
          /cluster/bluearc/tetNig1/linSpecRep.notInHuman

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    time netClass noClass.net tetNig1 hg17 hg17.net \
          -tNewR=/cluster/bluearc/tetNig1/linSpecRep.notInHuman \
          -qNewR=/cluster/bluearc/hg17/linSpecRep.notInTetraodon -noAr
    # 47.430u 31.160s 2:15.46 58.0%   0+0k 0+0io 197pf+0w
    netFilter -minGap=10 hg17.net |  hgLoadNet tetNig1 netHg17 stdin
    # featureBits tetNig1 all_mrna netHg17 -enrichment
    # all_mrna 3.204%, netHg17 42.716%, both 1.698%, cover 53.00%, enrich 1.24x

    # create symlinks to make consistent with newer builds 2006-04-09 markd
    ln -s all.chain.gz  /cluster/data/tetNig1/bed/blastz.hg17/axtChain/tetNig1.hg17.all.chain.gz
    ln -s hg17.net.gz  /cluster/data/tetNig1/bed/blastz.hg17/axtChain/tetNig1.hg17.net.gz

# BLASTZ FOR FR1 (FUGU) (DONE, 2004-08-31, hartera)

    ssh kkr1u00
    # blastz requires lineage-specific repeats but there are none
    # available between these two fish species to make empty files
    mkdir -p /iscratch/i/tetNig1/linSpecRep.notInFugu
    mkdir -p /iscratch/i/fugu/linSpecRep.notInZebrafish
    cd /cluster/data/tetNig1
    # for Tetraodon
    foreach c (`cat chrom.lst`)
      cp /dev/null /iscratch/i/tetNig1/linSpecRep.notInFugu/chr${c}.out.spec
    end
    # for Fugu
    cp /dev/null /iscratch/i/fugu/linSpecRep.notInZebrafish/chrUn.out.spec
    iSync

    ssh kk
    mkdir -p /cluster/data/tetNig1/bed/blastz.fr1.2004-08-31
    ln -s /cluster/data/tetNig1/bed/blastz.fr1.2004-08-31 \
          /cluster/data/tetNig1/bed/blastz.fr1
    cd /cluster/data/tetNig1/bed/blastz.fr1

    cat << '_EOF_' > DEF
# Tetraodon (tetNig1) vs. Fugu (fr1)
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin
                                                                                
ALIGN=blastz-run
BLASTZ=blastz
BLASTZ_L=8000
BLASTZ_H=2200
#BLASTZ_ABRIDGE_REPEATS=1 if SMSK is specified
BLASTZ_ABRIDGE_REPEATS=0
                                                                                
# TARGET - Tetraodon (tetNig1)
SEQ1_DIR=/iscratch/i/tetNig1/nib
SEQ1_RMSK=
# lineage-specific repeats
# we don't have that information for these species so these files are empty
SEQ1_SMSK=/iscratch/i/tetNig1/linSpecRep.notInFugu
SEQ1_FLAG=
SEQ1_IN_CONTIGS=0
# 10 MB chunk for target
SEQ1_CHUNK=10000000
SEQ1_LAP=10000
                                                                                
# QUERY - Fugu (fr1)
# soft-masked chrom nib
SEQ2_DIR=/cluster/bluearc/fugu/fr1/chromNib
SEQ2_RMSK=
SEQ2_SMSK=/iscratch/i/fugu/linSpecRep.notInZebrafish
SEQ2_FLAG=
SEQ2_IN_CONTIGS=0
# 10 Mbase for query
SEQ2_CHUNK=10000000
SEQ2_LAP=0
                                                                                
BASE=/cluster/data/tetNig1/bed/blastz.fr1
                                                                                
DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len
                                                                                
#DEBUG=1
'_EOF_'
    # << this line keeps emacs coloring happy
    # save the DEF file in the current standard place
    chmod +x DEF
    cp DEF ~angie/hummus/DEF.tetNig1-fr1.2004-08-31
    # setup cluster run
    # source the DEF file
    bash
    . ./DEF
    /cluster/data/tetNig1/jkStuff/BlastZ_run0.sh
    cd run.0
    # check batch looks ok then
    para try, check, push, check, ....
# para time
# Completed: 1995 of 1995 jobs
# CPU time in finished jobs:     502856s    8380.93m   139.68h    5.82d  0.016 y
# IO & Wait Time:               1190285s   19838.08m   330.63h   13.78d  0.038 y
# Average job time:                 849s      14.14m     0.24h    0.01d
# Longest job:                    19130s     318.83m     5.31h    0.22d
# Submission to last job:         19415s     323.58m     5.39h    0.22d

    # second cluster run: lift raw alignments -> lav dir
    cd /cluster/data/tetNig1/bed/blastz.fr1
    bash # if a csh/tcsh user
    . ./DEF
    /cluster/data/tetNig1/jkStuff/BlastZ_run1.sh
    cd run.1
    para try, check, push, check etc.
# para time
# Completed: 57 of 57 jobs
# IO & Wait Time:                   511s       8.52m     0.14h    0.01d  0.000 y
# Average job time:                  29s       0.48m     0.01h    0.00d
# Longest job:                       58s       0.97m     0.02h    0.00d
# Submission to last job:           853s      14.22m     0.24h    0.01d

    # third run: lav -> axt
    ssh kki
    cd /cluster/data/tetNig1/bed/blastz.fr1
    mkdir axtChrom run.2
    cd run.2
    cat << '_EOF_' > do.csh
#!/bin/csh -ef
cd $1
cat `ls -1 *.lav | sort -g` \
| lavToAxt stdin /iscratch/i/tetNig1/nib \
/cluster/bluearc/fugu/fr1/chromNib stdout \
| axtSort stdin $2
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x do.csh
    cat << '_EOF_' > gsub
#LOOP
./do.csh {check in exists $(path1)} {check out line+ /cluster/data/tetNig1/bed/blastz.fr1/axtChrom/$(root1).axt}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    \ls -1Sd ../lav/chr* > chrom.list
    gensub2 chrom.list single gsub jobList
    wc -l jobList
    head jobList
    para create jobList
    para try, check, push, check,...
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:         73s       1.22m     0.02h    0.00d  0.000 y
# IO & Wait Time:                   495s       8.25m     0.14h    0.01d  0.000 y
# Average job time:                  21s       0.35m     0.01h    0.00d
# Longest job:                       77s       1.28m     0.02h    0.00d
# Submission to last job:           318s       5.30m     0.09h    0.00d

   # translate sorted axt files into psl
    ssh kolossus
    cd /cluster/data/tetNig1/bed/blastz.fr1
    mkdir -p pslChrom
    set tbl = "blastzFr1"
    foreach f (axtChrom/chr*.axt)
      set c=$f:t:r
      echo "Processing chr $c"
      /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
    end

    # Load database tables
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.fr1/pslChrom
    foreach f (./*.psl)
      /cluster/bin/i386/hgLoadPsl -noTNameIx tetNig1 $f
      echo "$f Done"
    end

# default parameters and H=2000
# featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1 -enrichment
# all_mrna 2.637%, blastzFr1 78.364%, both 2.361%, cover 89.53%, enrich 1.14x 
# repeat with H=2000 and L=6000 - blastzL6k
# featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1L6k -enrichment
# all_mrna 2.637%, blastzFr1L6k 77.955%, both 2.348%, cover 89.04%, enrich 1.14x# very little difference
# featureBits -chrom=chr1 danRer1 all_mrna blastzFr1 -enrichment
# all_mrna 0.981%, blastzFr1 5.542%, both 0.592%, cover 60.32%, enrich 10.88x
# blastzFr1L6k
# BLASTZ_H=2000
# BLASTZ_Y=3400
# BLASTZ_L=6000
# BLASTZ_K=2200
# BLASTZ_Q=/cluster/data/blastz/HoxD55.q
# featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1L6k2 -enrichment
# all_mrna 2.637%, blastzFr1L6k2 79.917%, both 2.403%, cover 91.12%, 
# enrich 1.14x
# H=2200, default parameters
# featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200 -enrichment
# all_mrna 2.637%, blastzFr1H2200 78.225%, both 2.359%, cover 89.46%, 
# enrich 1.14x
# rows in chr1_blastzFr1* tables:
# blastzFr1	47079
# blastzFr1L6k	26408
# blastzFr1L6k2	52340
# blastzFr1H2200 44240
# blastzFr1H2200L8k 20502
# blastzFr1H2200L10k 17355
# try increasing L to 8000
# H=2200, L=8000
# featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200L8k -enrichment
# all_mrna 2.637%, blastzFr1H2200L8k 77.714%, both 2.345%, cover 88.94%, 
# enrich 1.14x
# H=2200 L=10000
#  featureBits -chrom=chr1 tetNig1 all_mrna blastzFr1H2200L10k -enrichment
# all_mrna 2.637%, blastzFr1H2200L10k 77.568%, both 2.340%, cover 88.72%, 
# enrich 1.14x
# 6 jobs took a very long time and did not finish
# looking at some alignments in these tracks with the mRNA track on there is not
# much difference between most of them but blastzFr1L6k2 often has more
# alignments and a different strucutre. Use blastzFr1H2200L8k as this has
# fewer alingments with very little loss of coverage and more lower scoring 
# alignments can be removed at the chaining step.

# CHAIN FR1 BLASTZ (DONE, 2004-09-01, hartera)
    # Run axtChain on little cluster
    ssh kki 
    cd /cluster/data/tetNig1/bed/blastz.fr1
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/tetNig1/bed/blastz.fr1/axtChrom/*.axt \
        > input.lst
cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'

cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
    /iscratch/i/tetNig1/nib \
    /cluster/bluearc/fugu/fr1/chromNib $2 >& $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 27 of 27 jobs
# CPU time in finished jobs:        414s       6.90m     0.11h    0.00d  0.000 y
# IO & Wait Time:                   252s       4.20m     0.07h    0.00d  0.000 y
# Average job time:                  25s       0.41m     0.01h    0.00d
# Longest job:                      118s       1.97m     0.03h    0.00d
# Submission to last job:           972s      16.20m     0.27h    0.01d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
    # take a look at score distr's,try also with larger bin size.
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r >> hist5000.out
      textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
      echo ""
    end

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chain
    hgLoadChain tetNig1 chr1_chainFr1 chr1.chain
# no RefSeqs for Tetraodon so use mrna table
# featureBits -chrom=chr1 tetNig1 all_mrna chainFr1Link -enrichment
# all_mrna 2.637%, chainFr1Link 76.099%, both 2.320%, cover 87.97%, enrich 1.16x
  
    # try filtering with minScore < 5000
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
    mv all.chain all.chain.unfiltered
    chainFilter -minScore=5000 all.chain.unfiltered > all.chain
    chainSplit chainFilt5k all.chain
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chainFilt5k
    hgLoadChain tetNig1 chr1_chainFr1Filt5k chr1.chain

# featureBits -chrom=chr1 tetNig1 all_mrna chainFr1Filt5kLink -enrichment
# all_mrna 2.637%, chainFr1Filt5kLink 75.999%, both 2.319%, cover 87.95%, 
# enrich 1.16x
# very little difference in the coverage so use the filtered version
    ssh hgwdev
    rm -r chain
    mv chainFilt5k chain
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain tetNig1 ${c}_chainFr1 $i
        echo done $c
    end

# NET FUGU (FR1) BLASTZ (DONE, 2004-09-01, hartera)
# RELOADED NET WITH CORRECT TABLE NAME (2004-09-02, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
    mkdir preNet
    cd chain
    foreach i (*.chain)
       echo preNetting $i
       /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                     ../preNet/$i
    end
                                                                                
    cd ..
    mkdir n1
    cd preNet
    foreach i (*.chain)
      set n = $i:r.net
      echo primary netting $i
      /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                 ../n1/$n /dev/null
    end
    cd ..
    cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
    # memory usage 300011520, utime 1360 s/100, stime 247

# Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
    # netClass looks for ancient repeats in one of the databases
    # hg17 has this table - hand-curated by Arian but this is for
    # human-rodent comparisons so do not use here, use -noAr option
    
    time netClass noClass.net tetNig1 fr1 fuguFr1.net -noAr
    netFilter -minGap=10 fuguFr1.net |  hgLoadNet tetNig1 netFr1 stdin

# MAKE Human Proteins track
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/blastDb
    cd /cluster/data/tetNig1
    cat */*/*.lft > jkStuff/subLift.lft
    for i in */*/*_*_*.fa; do ln -s `pwd`/$i blastDb; done  
    # remove *_random_#.fa (not listed)
    cd blastDb
    for i in *.fa; do formatdb -i $i -p F 2> /dev/null; done
    rm *.fa 

    ssh kkr1u00
    mkdir -p /iscratch/i/tetNig1/blastDb
    cd /cluster/data/tetNig1/blastDb
    cp * /iscratch/i/tetNig1/blastDb
    (~kent/bin/iSync) 2>&1 > sync.out
    
    mkdir -p /cluster/data/tetNig1/bed/tblastn.hg17KG
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    ls -1S /iscratch/i/tetNig1/blastDb/*.nsq | sed "s/\.nsq//" > tetra.lst
    exit

    # back to kksilo
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    mkdir kgfa
    # calculate a reasonable number of jobs
    calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(150000/`wc tetra.lst | awk "{print \\\$1}"`\)
    # 40262/(150000/979) = 262.776653
    split -l 263 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl kgfa/kg
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.1 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
if /cluster/bin/i386/blastToPsl $f.1 $f.2
then
        liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/tetNig1/jkStuff/subLift.lft warn $f.2
        liftUp -nosort -type=".psl" -nohead $f.4 /cluster/data/tetNig1/jkStuff/liftAll.lft warn $f.3
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.4
        mv $3.tmp $3
        rm -f $f.1 $f.2 $f.3 $f.4
        exit 0
    fi
fi
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 tetra.lst kg.lst blastGsub blastSpec

    # exit from tcsh
    exit 
    ssh kk
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    para create blastSpec
    para try, push
# Completed: 144760 of 144760 jobs
# CPU time in finished jobs:   22218348s  370305.81m  6171.76h  257.16d  0.705 y
# IO & Wait Time:               3507367s   58456.11m   974.27h   40.59d  0.111 y
# Average job time:                 178s       2.96m     0.05h    0.00d
# Longest job:                     2810s      46.83m     0.78h    0.03d
# Submission to last job:         41709s     695.15m    11.59h    0.48d

    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainSome
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainSome

    ls -1dS `pwd`/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    ssh kki
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    para create chainSpec
    para push
# Completed: 261 of 261 jobs
# CPU time in finished jobs:        828s      13.81m     0.23h    0.01d  0.000 y
# IO & Wait Time:                 19521s     325.34m     5.42h    0.23d  0.001 y
# Average job time:                  78s       1.30m     0.02h    0.00d
# Longest job:                      270s       4.50m     0.07h    0.00d
# Submission to last job:          3878s      64.63m     1.08h    0.04d

    exit
    # back to kksilo
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG/blastOut
    for i in kg??
    do 
	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat u.*.psl m60* | liftUp -nosort -type=".psl" -nohead stdout /cluster/data/tetNig1/jkStuff/liftAll.lft warn stdin |  \
    	sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > ../blastHg17KG.psl
    cd ..

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    hgLoadPsl tetNig1 blastHg17KG.psl
    exit

    # back to kksilo
    rm -rf blastOut

# End tblastn

# MAKE DOWNLOADABLE SEQUENCE FILES (DONE, 2004-09-08, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1
    #- Build the .zip files
    cat << '_EOF_' > jkStuff/zipAll.csh
rm -rf zip
mkdir zip
# chrom AGP's
zip -j zip/chromAgp.zip [0-9A-Z]*/chr*.agp
# chrom RepeatMasker out files
zip -j zip/chromOut.zip [0-9A-Z]*/chr*.fa.out
# soft masked chrom fasta
zip -j zip/chromFa.zip [0-9A-Z]*/chr*.fa
# hard masked chrom fasta
zip -j zip/chromFaMasked.zip [0-9A-Z]*/chr*.fa.masked
# chrom TRF output files
cd bed/simpleRepeat
zip ../../zip/chromTrf.zip trfMask/chr*.bed
cd ../..
# get GenBank native mRNAs
cd /cluster/data/genbank
./bin/i386/gbGetSeqs -db=tetNig1 -native GenBank mrna \
        /cluster/data/tetNig1/zip/mrna.fa
# get GenBank xeno mRNAs
./bin/i386/gbGetSeqs -db=tetNig1 -xeno GenBank mrna \
        /cluster/data/tetNig1/zip/xenoMrna.fa
# get native GenBank ESTs
./bin/i386/gbGetSeqs -db=tetNig1 -native GenBank est \
/cluster/data/tetNig1/zip/est.fa

cd /cluster/data/tetNig1/zip
# zip GenBank native and xeno mRNAs and native ESTs
zip -j mrna.zip mrna.fa
zip -j xenoMrna.zip xenoMrna.fa
zip -j est.zip est.fa
'_EOF_'
    # << this line makes emacs coloring happy
    nice csh ./jkStuff/zipAll.csh |& tee ./jkStuff/zipAll.log
    cd jkStuff
    #- Look at zipAll.log to make sure all file lists look reasonable.
    # Copy the .zip files to
    # hgwdev:/usr/local/apache/...
    cd /cluster/data/tetNig1/zip
    #- Check zip file integrity:
    foreach f (*.zip)
      unzip -t $f > $f.test
      tail -1 $f.test
    end
    wc -l *.zip.test
# 29 chromAgp.zip.test
# 29 chromFa.zip.test
# 29 chromFaMasked.zip.test
# 29 chromOut.zip.test
# 87 chromTrf.zip.test
#  3 est.zip.test
#  3 mrna.zip.test
#  3 xenoMrna.zip.test
#  212 total

    ssh hgwdev
    /cluster/data/tetNig1/zip
    set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
    mkdir -p $gp/bigZips
    cp -p *.zip $gp/bigZips
    mkdir -p $gp/chromosomes
    foreach f (../*/chr*.fa)
       zip -j $gp/chromosomes/$f:t.zip $f
    end
    cd $gp/bigZips
    md5sum *.zip > md5sum.txt
    cd $gp/chromosomes
    md5sum *.zip > md5sum.txt
    # Take a look at bigZips/* and chromosomes/*, update their README.txt's

# MAKE VSHG17 DOWNLOADABLES (DONE, 2004-09-08, hartera)
    ssh kksilo
    # zip chains and nets
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChain
    cp all.chain hg17.chain
    zip -j /cluster/data/tetNig1/zip/hg17.chain.zip hg17.chain
    rm hg17.chain
    zip -j /cluster/data/tetNig1/zip/hg17.net.zip hg17.net

    ssh hgwdev
    # copy chains and nets to downloads area
    set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
    mkdir -p $gp/vsHg17
    cd $gp/vsHg17
    mv /cluster/data/tetNig1/zip/hg17*.zip .
    md5sum *.zip > md5sum.txt

    # move axt files to downloads area and zip
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap/axtChrom
    mkdir -p $gp/vsHg17/axtChrom
    cp -p *.axt $gp/vsHg17/axtChrom
    cd $gp/vsHg17/axtChrom
    gzip *.axt
    md5sum *.gz > md5sum.txt

    # Copy over & edit README.txt w/pointers to chain, net formats.

# MAKE VSFR1 DOWNLOADABLES (DONE, 2004-09-08, hartera)
    ssh kksilo
    # zip chains and nets
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChain
    cp all.chain fr1.chain
    zip -j /cluster/data/tetNig1/zip/fr1.chain.zip fr1.chain
    rm fr1.chain
    cp fuguFr1.net fr1.net
    zip -j /cluster/data/tetNig1/zip/fr1.net.zip fr1.net
    rm fr1.net

    ssh hgwdev
    # copy chains and nets to downloads area
    set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
    mkdir -p $gp/vsFr1
    cd $gp/vsFr1
    mv /cluster/data/tetNig1/zip/fr1*.zip .
    md5sum *.zip > md5sum.txt

    # move axt files to downloads area and zip
    cd /cluster/data/tetNig1/bed/blastz.fr1/axtChrom
    mkdir -p $gp/vsFr1/axtChrom
    cp -p *.axt $gp/vsFr1/axtChrom
    cd $gp/vsFr1/axtChrom
    gzip *.axt
    md5sum *.gz > md5sum.txt

    # Copy over & edit README.txt w/pointers to chain, net formats.

# BLASTZ SWAP FOR mm5 vs tetNig1 BLASTZ TO CREATE tetNig1 vs MOUSE mm5 BLASTZ
# (DONE, 2004-09-08, hartera)
    ssh kksilo
    mkdir -p /cluster/store8/tetNig1/blastz.mm5.swap.2004-09-08
    ln -s /cluster/store8/tetNig1/blastz.mm5.swap.2004-09-08 \
          /cluster/data/tetNig1/bed
    ln -s /cluster/data/tetNig1/bed/blastz.mm5.swap.2004-09-08 \
          /cluster/data/tetNig1/bed/blastz.mm5.swap
    ssh kolossus
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap
    # use axtChrom from blastzTetNig1 on mm5
    set aliDir = /cluster/data/mm5/bed/blastz.tetNig1
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    mkdir unsorted axtChrom

    cat $aliDir/axtChrom/chr*.axt \
    | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \
    | axtSplitByTarget stdin unsorted
    # Sort the shuffled .axt files.
    foreach f (unsorted/*.axt)
      echo sorting $f:t:r
      axtSort $f axtChrom/$f:t
    end
    du -sh $aliDir/axtChrom unsorted axtChrom
    # 522M    /cluster/data/mm5/bed/blastz.tetNig1/axtChrom
    # 523M    unsorted
    # 522M    axtChrom
    rm -r unsorted
    # translate sorted axt files into psl
    ssh kolossus
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap
    mkdir -p pslChrom
    set tbl = "blastzMm5"
    foreach f (axtChrom/chr*.axt)
      set c=$f:t:r
      echo "Processing chr $c"
      /cluster/bin/i386/axtToPsl $f S1.len S2.len pslChrom/${c}_${tbl}.psl
    end
    # Load database tables
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/pslChrom
    foreach f (./*.psl)
      /cluster/bin/i386/hgLoadPsl tetNig1 $f
      echo "$f Done"
    end
# featureBits -chrom=chr1 tetNig1 all_mrna blastzMm5 -enrichment
# all_mrna 2.637%, blastzMm5 12.633%, both 0.852%, cover 32.30%, enrich 2.56x

# CHAIN MOUSE (mm5) BLASTZ (DONE, 2004-09-09, hartera)
    # Run axtChain on little cluster
    ssh kki
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    ls -1S /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChrom/*.axt \
                    > input.lst
    cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
                                                                                
    # Make our own linear gap file with reduced gap penalties,
    # in hopes of getting longer chains - works well for species at
    # chicken-human distance or greater
    cat << '_EOF_' > ../../chickenHumanTuned.gap
tablesize^V     11
smallSize^V     111
position^V      1^V     2^V     3^V     11^V    111^V   2111^V  12111^V 32111^V
72111^V 152111^V        252111
qGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
31600^V 56600
tGap^V  325^V   360^V   400^V   450^V   600^V   1100^V  3600^V  7600^V  15600^V
31600^V 56600
bothGap^V       625^V   660^V   700^V   750^V   900^V   1400^V  4000^V  8000^V
16000^V 32000^V 57000
'_EOF_'
# << this line makes emacs coloring happy
                                                                                
cat << '_EOF_' > doChain
#!/bin/csh
axtChain -linearGap=../../chickenHumanTuned.gap $1 \
    /iscratch/i/tetNig1/nib \
    /iscratch/i/mus/mm5/softNib $2 >& $3 
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:       2124s      35.40m     0.59h    0.02d  0.000 y
# IO & Wait Time:                    59s       0.99m     0.02h    0.00d  0.000 y
# Average job time:                  81s       1.35m     0.02h    0.00d
# Longest job:                      123s       2.05m     0.03h    0.00d
# Submission to last job:          3725s      62.08m     1.03h    0.04d

   # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chain all.chain
                                                                                
    # take a look at score distr's
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r >> hist5000.out
      textHistogram -binSize=5000 /tmp/score.$f:t:r >> hist5000.out
      echo ""
    end
    # apart from chrUn_random, not too many chains with score < 5000
    # load chr 1 into table
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chain
    hgLoadChain tetNig1 chr1_chainMm5 chr1.chain
# no RefSeqs for Tetraodon so use mrna table
# featureBits -chrom=chr1 tetNig1 all_mrna chainMm5Link -enrichment
# all_mrna 2.637%, chainMm5Link 11.552%, both 0.823%, cover 31.19%, enrich 2.70x
    # try filtering with minScore < 5000
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    mv all.chain all.chain.unfiltered
    chainFilter -minScore=5000 all.chain.unfiltered > all.chain
    chainSplit chainFilt5k all.chain
    
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chainFilt5k
    hgLoadChain tetNig1 chr1_chainMm5Filt5k chr1.chain
# featureBits -chrom=chr1 tetNig1 all_mrna chainMm5Filt5kLink -enrichment
# all_mrna 2.637%, chainMm5Filt5kLink 10.492%, both 0.780%, cover 29.58%, 
# enrich 2.82x
# use chains with minScore=5000 filtering
# remove test chain tables for chr1
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    hgsql -e "drop table chr1_chainMm5;" tetNig1
    hgsql -e "drop table chr1_chainMm5Link;" tetNig1
    hgsql -e "drop table chr1_chainMm5Filt5k;" tetNig1
    hgsql -e "drop table chr1_chainMm5Filt5kLink;" tetNig1
    rm -r chain
    mv chainFilt5k chain

    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain/chain
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain tetNig1 ${c}_chainMm5 $i
        echo done $c
    end

# NET MOUSE (mm5) BLASTZ (DONE, 2004-09-09, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    mkdir preNet
    cd chain
    foreach i (*.chain)
       echo preNetting $i
       /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S2.len \
                                     ../preNet/$i
    end
    cd ..
    mkdir n1
    cd preNet
    foreach i (*.chain)
      set n = $i:r.net
      echo primary netting $i
      /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S2.len \
                                 ../n1/$n /dev/null
    end
    cd ..
    cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
    # memory usage 67969024, utime 336 s/100, stime 36

    # Add classification info using db tables:
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    # netClass looks for ancient repeats in one of the databases
    # hg17 has this table - hand-curated by Arian but this is for
    # human-rodent comparisons so do not use here, use -noAr option
    mkdir -p /cluster/bluearc/mm5/linSpecRep.notInTetraodon
    mkdir -p /cluster/bluearc/tetNig1/linSpecRep.notInMouse
    cp -p /iscratch/i/mm5/linSpecRep.notInTetraodon/* \
          /cluster/bluearc/mm5/linSpecRep.notInTetraodon
    cp -p /iscratch/i/tetNig1/linSpecRep.notInMouse/* \
          /cluster/bluearc/tetNig1/linSpecRep.notInMouse

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    time netClass noClass.net tetNig1 mm5 mouseMm5.net \
          -tNewR=/cluster/bluearc/tetNig1/linSpecRep.notInMouse \
          -qNewR=/cluster/bluearc/mm5/linSpecRep.notInTetraodon -noAr
    # 50.960u 34.880s 2:25.82 58.8%   0+0k 0+0io 200pf+0w
    netFilter -minGap=10 mouseMm5.net |  hgLoadNet tetNig1 netMm5 stdin
    # featureBits tetNig1 all_mrna netHg17 -enrichment
    # all_mrna 3.204%, netHg17 42.716%, both 1.698%, cover 53.00%, enrich 1.24x

# MAKE VSMM5 DOWNLOADABLES (DONE, 2004-09-10, hartera)
    ssh kksilo
    # zip chains and nets
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChain
    cp all.chain mm5.chain
    zip -j /cluster/data/tetNig1/zip/mm5.chain.zip mm5.chain
    rm mm5.chain
    cp mouseMm5.net mm5.net
    zip -j /cluster/data/tetNig1/zip/mm5.net.zip mm5.net
    rm mm5.net

    ssh hgwdev
    # copy chains and nets to downloads area
    set gp = /usr/local/apache/htdocs/goldenPath/tetNig1
    mkdir -p $gp/vsMm5
    cd $gp/vsMm5
    mv /cluster/data/tetNig1/zip/mm5*.zip .
    md5sum *.zip > md5sum.txt
    # move axt files to downloads area and zip
    cd /cluster/data/tetNig1/bed/blastz.mm5.swap/axtChrom
    mkdir -p $gp/vsMm5/axtChrom
    cp -p *.axt $gp/vsMm5/axtChrom
    cd $gp/vsMm5/axtChrom
    gzip *.axt
    md5sum *.gz > md5sum.txt

    # Copy over & edit README.txt w/pointers to chain, net formats.

# CLEANUP HUMAN (hg17) BLASTZ (DONE, 2004-09-10, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.hg17.swap
    nice rm axtChain/run1/chain/* &
    nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &

# CLEANUP FUGU (fr1) BLASTZ (DONE, 2004-09-10, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.fr1
    nice rm -rf raw &
    nice rm -rf lav &
    nice rm -rf test* &
    nice rm axtChain/run1/chain/* &
    nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &

# CLEANUP MOUSE (mm5) BLASTZ (DONE, 2004-09-10, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastz.mm5
    nice rm -rf raw &
    nice rm -rf lav &
    nice rm -rf test* &
    nice rm axtChain/run1/chain/* &
    nice gzip {axt,psl}Chrom/* axtChain/{all.chain,*.net} &

# GENSCAN GENE PREDICTION ANNOTATIONS (DONE, 2004-09-10, hartera)
# Provided by Genoscope, Genscan was run on masked Tetraodon genomic DNA 
# using parameters calibrated for Tetraodon genes.

    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/genscan
    cd /cluster/data/tetNig1/bed/Genoscope/genscan
    awk '{if ($2 == "GSC") print;}'  /cluster/data/tetNig1/tetraodon.all.gff > genscan.gff 
    foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
        echo "Processing chr$c ... "
        awk '{if ($1 == "'chr${c}'") print;}' genscan.gff >> chr${c}.gff
    end

    # need to remove CDS from name then load
    foreach f (./chr*.gff) 
       perl -pi.bak -e 's/CDS G/G/' $f 
    end
    # load database with gene predictions
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/genscan
    # Reloaded without -genePredExt 1/6/05:
    nice ldHgGene tetNig1 genscan chr*.gff
    # Read 56120 transcripts in 227294 lines in 27 files
    # 56120 groups 27 seqs 1 sources 2 feature types
    # 28060 gene predictions

# GENOSCOPE GAZE ANNOTATIONS (DONE, 2004-09-10, hartera)
# GAZE annotations have been automatically generated by the GAZE software 
# (Howe et al.(2002). Genome Res., 12: 1418-27) using a custom designed gene 
# model. GAZE integrates annotation information from Geneid, Genescan, 
# Exofish (Human, Mouse, Fugu), Genewise (Human, Mouse) and cDNAs (Tetraodon).

    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/gaze
    cd /cluster/data/tetNig1/bed/Genoscope/gaze
    awk '{if ($2 == "GSTEN") print;}' /cluster/data/tetNig1/tetraodon.all.gff > gaze.gff 
    # remove ; as separators between names of gene, protein, mRNA etc. 

    perl -pi.bak -e "s/\s\;\s/\t/g;" gaze.gff
    foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
        echo "Processing chr$c ... "
        awk '{if ($1 == "'chr${c}'") print;}' gaze.gff >> chr${c}.gff
    end
    # need to remove CDS from name then load
    foreach f (./chr*.gff) 
       perl -pi.bak -e 's/CDS G/G/' $f 
    end
    # load database with gene predictions
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/gaze
    # loaded chr1 only into rahseq first to check then load all chroms
    # Reloaded without -genePredExt 1/6/05:
    nice ldHgGene tetNig1 gaze chr*.gff

    # GALT 09-15-04 index on name(10) going to name(17) as all have long prefix
    echo "drop index name on gaze ;" | hgsql tetNig1
    echo "create index name on gaze (name(17)) ;" | hgsql tetNig1
	

# Read 91905 transcripts in 265439 lines in 27 files
# 91905 groups 27 seqs 1 sources 4 feature types
# 27918 gene predictions

# HOX GENES TRACK (DONE, 2004-09-10, hartera)
# Provided by Genoscope: Tetraodon HOX genes were annotated by human experts 
# using sequence similarities with human, mouse and zebrafish HOX protein and 
# nucleic sequence.

    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/hoxGenes
    cd /cluster/data/tetNig1/bed/Genoscope/hoxGenes
    awk '{if ($2 == "HOX") print;}' /cluster/data/tetNig1/tetraodon.all.gff > hoxGenes.gff 
    # remove ; as separators between names of gene, protein, mRNA etc. 

    perl -pi.bak -e "s/\s\;\s/\t/g;" hoxGenes.gff
    foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
        echo "Processing chr$c ... "
        awk '{if ($1 == "'chr${c}'") print;}' hoxGenes.gff >> chr${c}.gff
    end
    # need to remove CDS from name then load
    foreach f (./chr*.gff) 
       perl -pi.bak -e 's/CDS H/H/' $f 
    end
    # load database with gene predictions
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/hoxGenes
    # loaded chr1 only into rahseq first to check then load all chroms
    # Reloaded without -genePredExt 1/6/05:
    nice ldHgGene tetNig1 hoxGenes chr*.gff
# Read 96 transcripts in 142 lines in 27 files
# 96 groups 6 seqs 1 sources 2 feature types
# 48 gene predictions

# CYTOKINE GENES TRACK (DONE, 2004-09-10, hartera)
# Provided by Genoscope: Tetraodon Cytokine genes were annotated by human 
# experts using sequence similarities with human, mouse and zebrafish 
# Cytokine protein and nucleic sequence.

    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/cytokines
    cd /cluster/data/tetNig1/bed/Genoscope/cytokines
    awk '{if ($2 == "CYT") print;}' /cluster/data/tetNig1/tetraodon.all.gff > cytokines.gff 
    # remove ; as separators between names of gene, protein, mRNA etc. 

    perl -pi.bak -e "s/\s\;\s/\t/g;" cytokines.gff
    foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
        echo "Processing chr$c ... "
        awk '{if ($1 == "'chr${c}'") print;}' cytokines.gff >> chr${c}.gff
    end
    # need to remove CDS from name then load
    foreach f (./chr*.gff) 
       perl -pi.bak -e 's/CDS A/A/' $f 
    end
    # load database with gene predictions
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/cytokines
    # loaded chr1 only into rahseq first to check then load all chroms
    # Reloaded without -genePredExt 1/6/05:
    nice ldHgGene tetNig1 cytokines chr*.gff

# GENEID GENES (DONE 9/22/04 angie)
    ssh kksilo
    mkdir /cluster/data/tetNig1/bed/geneid
    cd /cluster/data/tetNig1/bed/geneid
    foreach chr (`awk '{print $1;}' ../../chrom.sizes`)
      wget http://genome.imim.es/genepredictions/T.nigroviridis/tetNigFeb2004/geneid_v1.2/$chr.gtf
      wget http://genome.imim.es/genepredictions/T.nigroviridis/tetNigFeb2004/geneid_v1.2/$chr.prot
    end
    # Add ".1" suffix to each item in .prot's, to match transcript_id's in gtf
    cp /dev/null geneidPep.fa
    foreach f (chr*.prot)
      perl -wpe 's/^(>chr\S+)/$1.1/' $f >> geneidPep.fa
    end
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/geneid
    ldHgGene -gtf -genePredExt tetNig1 geneid chr*.gtf
    hgPepPred tetNig1 generic geneidPep geneidPep.fa

# GENOSCOPE GENEWISE HUMAN IPI (DONE, 2004-10-14, hartera)
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/humanIPI
    cd /cluster/data/tetNig1/bed/Genoscope/humanIPI
    # Create README to describe data from Genoscope tetraodon.all.gff.readme
    awk '{if ($2 == "GWS_H") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
        > humanIPI.gff
    # remove "exon" from name field 
    sed -e 's/exon GWS/GWS/g' humanIPI.gff > humanIPIformat.gff

    # load database with gene predictions
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/humanIPI
    nice ldHgGene -genePredExt tetNig1 humanIPI humanIPIformat.gff
    # Read 43384 transcripts in 185494 lines in 1 files
    # 43384 groups 27 seqs 1 sources 2 feature types
    # 21692 gene predictions
    # add to trackDb and write html page

# GENOSCOPE HUMAN IPI ECOTIGS (DONE, 2004-10-18, hartera) 
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
    cd /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
    # Create README to describe data from Genoscope tetraodon.all.gff.readme
    awk '{if ($2 == "EP3_H") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
        > humanIPIEcotigs.gff
    # remove "exon" from name field 
    sed -e 's/exon EP3/EP3/g' humanIPIEcotigs.gff > humanIPIEcotigsformat.gff

    # load database with gene predictions - LOAD IN RAHSEQ FIRST TO CHECK
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/humanIPIEcotigs
    nice ldHgGene -genePredExt tetNig1 humanIPIEcotigs humanIPIEcotigsformat.gff
    # Read 55800 transcripts in 204355 lines in 1 files
    # 55800 groups 27 seqs 1 sources 2 feature types
    # 27900 gene predictions

# GENOSCOPE FUGU (Takifugu) ECOTIGS (in progress, 2004-10-18, hartera) 
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
    cd /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
    # Create README to describe data from Genoscope tetraodon.all.gff.readme
    awk '{if ($2 == "EG3_F") print;}' /cluster/data/tetNig1/tetraodon.all.gff \
        > fuguEcotigs.gff
 # remove "HSP" from name field, change HSP to "exon" and match to "transcript" 
    sed -e 's/HSP EG3/EG3/g' fuguEcotigs.gff | sed -e 's/HSP/exon/' \
           | sed -e 's/match/transcript/' > fuguEcotigsformat.gff

    # load database with gene predictions
    # load in when know which version of Fugu this is so can name appropriately
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/fuguEcotigs
    nice ldHgGene -genePredExt tetNig1 fuguEcotigs fuguEcotigsformat.gff
    # Read 35552 transcripts in 210128 lines in 1 files
    #   35552 groups 27 seqs 1 sources 2 feature types
    # 17776 gene predictions

# Adding a photograph to the description
#	Permission to use photos obtained from Hugues Roest Crollius:
#	Email message attached below
    ssh hgwdev
    cd /cluster/data/tetNig1/html
    wget \
	'http://www.genoscope.cns.fr/externe/Francais/Projets/Projet_C/img/tetra23.jpg' \
	-O photo_tetraodon.jpg
    convert -crop 1320x600+280+40 -sharpen 0 -normalize \
	-geometry 300x200 -quality 80 photo_tetraodon.jpg \
	Tetraodon_nigroviridis.jpg
    cp -p Tetraodon_nigroviridis.jpg /usr/local/apache/htdocs/images
    #	add links to this image in the description.html page, request push

# After checking with a couple of journals who have used our pictures for
# their covers, it appears that none holds copyrights so you are free to use
# any of the pictures on the followings pages:

# http://www.genoscope.cns.fr/externe/English/Projets/Projet_C/album.html

# For some of the images on the first page we can provide you with high
# resolution versions if needed.

# I hope this helps.

# Best wishes,

# Hugues Roest Crollius
# -------------------------------------------------------------------------
# Hugues Roest Crollius
# Laboratoire Dynamique et Organisation des Genomes (Dyogen)
# CNRS UMR8541
# Departement de Biologie
# Ecole Normale Superieure                        Tel: +33 (0)1 44 32 23 70
# 46 rue d'Ulm, 75230 Paris Cedex 05              Email: hrc@ens.fr

# SELF BLASTZ (DONE, 2005-01-12, hartera)
    ssh kksilo
    rm /cluster/data/tetNig1/bed/blastzSelf
    mkdir -p /cluster/store8/tetNig1/bed/blastzSelf.2005-01-11
    ln -s /cluster/store8/tetNig1/bed/blastzSelf.2005-01-11 \
          /cluster/data/tetNig1/bed/blastzSelf
    cd /cluster/data/tetNig1/bed/blastzSelf
    # try using contigs
    mkdir -p /cluster/data/tetNig1/bed/blastzSelf/contigSeqs
    cd /cluster/data/tetNig1/bed/blastzSelf/contigSeqs
    foreach c (`cat /cluster/data/tetNig1/chrom.lst`)
       foreach d (/cluster/data/tetNig1/$c/chr${c}*_?{,?})
      echo "splitting $d"
      set contig = $d:t
      echo "$contig"
      ~/bin/i386/faSplit gap $d/$contig.fa 500000 ${contig}_ -lift=$contig.lft \
            -minGapSize=100
      end
    end
    # 1025 files for chr*.fa and chr*.lft - 940 for chr*.fa, 85 for chr*.lft
    cat chr*.fa >> tetNig1Contigs.fa
    cat chr*.lft >> 500kbcontigs.lft
    rm chr*.fa chr*.lft
    
    ssh kkr1u00
    mkdir -p /iscratch/i/tetNig1/contigs
    mv /cluster/data/tetNig1/bed/blastzSelf/contigSeqs/tetNig1Contigs.fa \
       /iscratch/i/tetNig1/contigs
    iSync

    ssh kk
    cd /cluster/data/tetNig1/bed/blastzSelf
    # for this blastz run, M=50
    cat << '_EOF_' > DEF
# tetraodon vs. tetraodon
export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/home/angie/schwartzbin:/cluster/home/kent/bin/i386

ALIGN=blastz-run1
BLASTZ=blastz
BLASTZ_L=5000
BLASTZ_H=2500
BLASTZ_ABRIDGE_REPEATS=0

# TARGET
# Tetraodon
SEQ1_DIR=/iscratch/i/tetNig1/nib
# not used
SEQ1_RMSK=
# not used
SEQ1_FLAG=
SEQ1_SMSK=
SEQ1_IN_CONTIGS=0
SEQ1_CHUNK=500000
SEQ1_LAP=500

# QUERY
# Tetraodon
SEQ2_DIR=/iscratch/i/tetNig1/contigs
# not currently used
SEQ2_RMSK=
# not currently used
SEQ2_FLAG=
SEQ2_SMSK=
SEQ2_IN_CONTIGS=1
SEQ2_CHUNK=
SEQ2_LAP=

BASE=/cluster/data/tetNig1/bed/blastzSelf

DEF=$BASE/DEF
RAW=$BASE/raw
CDBDIR=$BASE
SEQ1_LEN=$BASE/S1.len
SEQ2_LEN=$BASE/S2.len

#DEBUG=1
'_EOF_'
    # << this line makes emacs coloring happy                                                                                
    # Save the DEF file in the current standard place - TO BE DONE
    chmod +x DEF
    cp DEF ~angie/hummus/DEF.tetNig1-tetNig1.2005-01-11
    # prepare first cluster run
    bash # if a csh/tcsh user
    . ./DEF
    /cluster/data/tetNig1/jkStuff/BlastZ_run0.sh
    cd run.0
    # check batch looks ok then
    para try, check, push, check, ....
# para time
# Completed: 820 of 820 jobs
# CPU time in finished jobs:    1611164s   26852.74m   447.55h   18.65d  0.051 y
# IO & Wait Time:                 11448s     190.80m     3.18h    0.13d  0.000 y
# Average job time:                1979s      32.98m     0.55h    0.02d
# Longest job:                     4231s      70.52m     1.18h    0.05d
# Submission to last job:         14753s     245.88m     4.10h    0.17d

    # second cluster run: lift raw alignments -> lav dir
    cd /cluster/data/tetNig1/bed/blastzSelf
    bash # if a csh/tcsh user
    . ./DEF
    /cluster/data/tetNig1/jkStuff/BlastZ_run1.sh
    cd run.1
    para try, check, push, check etc.
# para time
# Completed: 820 of 820 jobs
# CPU time in finished jobs:       2741s      45.68m     0.76h    0.03d  0.000 y
# IO & Wait Time:                 13346s     222.44m     3.71h    0.15d  0.000 y
# Average job time:                  20s       0.33m     0.01h    0.00d
# Longest job:                      106s       1.77m     0.03h    0.00d
# Submission to last job:           926s      15.43m     0.26h    0.01d

    # third run: lav -> axt
    ssh kki
    cd /cluster/data/tetNig1/bed/blastzSelf
    mkdir axtChrom run.2
    cd run.2
    cat << '_EOF_' > do.csh
#!/bin/csh -ef
cd $1
cat `ls -1 *.lav | sort -g` \
| lavToAxt -fa stdin /iscratch/i/tetNig1/nib \
/iscratch/i/tetNig1/contigs/tetNig1Contigs.fa stdout \
| axtSort stdin $2
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x do.csh
    cat << '_EOF_' > gsub
#LOOP
./do.csh {check in exists $(path1)} {check out line+ /cluster/data/tetNig1/bed/blastzSelf/axtChrom/$(root1).axt}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
    \ls -1Sd ../lav/chr* > chrom.list
    gensub2 chrom.list single gsub jobList
    wc -l jobList
    head jobList
    para create jobList
    para try, check, push, check,...
# para time
# Completed: 27 of 27 jobs
# CPU time in finished jobs:        337s       5.61m     0.09h    0.00d  0.000 y
# IO & Wait Time:                   324s       5.41m     0.09h    0.00d  0.000 y
# Average job time:                  24s       0.41m     0.01h    0.00d
# Longest job:                      273s       4.55m     0.08h    0.00d
# Submission to last job:          1675s      27.92m     0.47h    0.02d

    # do liftup here
    #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastzSelf
    
    foreach d (axtChrom/chr*.axt)
      echo $d
      liftUp -axtQ ${d}.out.axt ./contigSeqs/500kbcontigs.lft warn $d \
        > /dev/null
    end

    #- Lift pseudo-contigs to chromosome level
    # check this is working correctly
    set tetra = "/cluster/data/tetNig1"
    foreach c (`cat ${tetra}/chrom.lst`)
      echo lifting $c
      liftUp -axtQ ./axtChrom/chr${c}.out2.axt \
         $tetra/jkStuff/liftAll.lft warn \
        ./axtChrom/chr${c}.axt.out.axt > /dev/null
    end
    cd axtChrom
    # need to drop Self alignments after lifting up co-ordinates
    foreach c (`cat ${tetra}/chrom.lst`)
      echo "Processing $c..."
      axtDropSelf chr${c}.out2.axt chr${c}.dropped.axt
      mv chr${c}.axt chr${c}.axt.old
      mv chr${c}.dropped.axt chr${c}.axt
      rm chr${c}.axt.out.axt
    end
    # translate sorted axt files into psl
    ssh kolossus
    cd /cluster/data/tetNig1/bed/blastzSelf
    mkdir -p pslChrom
    set tbl = "blastzSelf"
    foreach f (axtChrom/chr*.axt)
      set c=$f:t:r
      echo "Processing chr $c"
      /cluster/bin/i386/axtToPsl $f S1.len S1.len pslChrom/${c}_${tbl}.psl
    end
    # Load database tables
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastzSelf/pslChrom
    foreach f (./*.psl)
      /cluster/bin/i386/hgLoadPsl -noTNameIx tetNig1 $f
      echo "$f Done"
    end
# not in contigs: blastzSelf
# featureBits -chrom=chr1 tetNig1 all_mrna blastzSelf -enrichment
# all_mrna 2.601%, blastzSelf 16.728%, both 0.654%, cover 25.13%, enrich 1.50x
# featureBits -chrom=chr1 tetNig1 blastzSelf
# 2704737 bases of 16169091 (16.728%) in intersection

# blastzSelfC, query is in contigs, M=50
# featureBits -chrom=chr1 tetNig1 all_mrna blastzSelfC -enrichment
# all_mrna 2.601%, blastzSelfC 16.208%, both 0.638%, cover 24.51%, enrich 1.51x
# featureBits -chrom=chr1 tetNig1 blastzSelfC 
# 2620707 bases of 16169091 (16.208%) in intersection

# blastzSelfCM100, query is in contigs, M=100
# featureBits -chrom=chr1 tetNig1 all_mrna blastzSelfCM100 -enrichment
# all_mrna 2.601%, blastzSelfCM100 16.319%, both 0.639%, cover 24.56%, 
# enrich 1.51x
# featureBits -chrom=chr1 tetNig1 blastzSelfCM100
# 2638606 bases of 16169091 (16.319%) in intersection
# gain little by using M=100 except for a lot of extra alignments so use 
# M=50 instead.
# L=5000
# featureBits -chrom=chr1 tetNig1 blastzSelfL5k
# 2488953 bases of 16169091 (15.393%) in intersection
# L=8000, L8k
# featureBits -chrom=chr1 tetNig1 blastzSelfL8k
# 2303191 bases of 16169091 (14.244%) in intersection 
# rows in table
# blastzSelf: 517242
# blastzSelfC: 86073
# blastzSelfCM100: 132429
# blastzSelfCL5k: 62933
# blastzSelfL8k: 43137

# use blastz with contigs, M=50 and L=5000. Using L=5000 removes some of the 
# short low scoring alignments but not too stringently. After chaining, 
# further removal of low scoring alignments can be done.

# CHAIN SELF BLASTZ (DONE, 2005-01-14, hartera)
    # Run axtChain on little cluster
    ssh kki 
    cd /cluster/data/tetNig1/bed/blastzSelf
    mkdir -p axtChain/run1
    cd axtChain/run1
    mkdir out chain
    rm axtChrom/chr*.out2.axt
    rm axtChrom/chr*.old
    ls -1S /cluster/data/tetNig1/bed/blastzSelf/axtChrom/*.axt \
        > input.lst
cat << '_EOF_' > gsub
#LOOP
doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out}
#ENDLOOP
'_EOF_'
    # << this line makes emacs coloring happy
# axtChain now using human/chicken gap scoring matrix as default
cat << '_EOF_' > doChain
#!/bin/csh
axtChain $1 \
    /iscratch/i/tetNig1/nib \
    /iscratch/i/tetNig1/nib $2 >& $3
'_EOF_'
    # << this line makes emacs coloring happy
    chmod a+x doChain
    gensub2 input.lst single gsub jobList
    para create jobList
    para try, check, push, check...
# Completed: 27 of 27 jobs
# CPU time in finished jobs:        864s      14.40m     0.24h    0.01d  0.000 y
# IO & Wait Time:                   137s       2.28m     0.04h    0.00d  0.000 y
# Average job time:                  37s       0.62m     0.01h    0.00d
# Longest job:                      559s       9.32m     0.16h    0.01d
# Submission to last job:          1758s      29.30m     0.49h    0.02d

    # now on the cluster server, sort chains
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
    chainMergeSort run1/chain/*.chain > all.chain
    chainSplit chainRaw all.chain
    # apply chainAntiRepeat to remove chains that are the result of repeats
    # and degenerate DNA 
    mkdir chain
    cd chainRaw
    foreach f (*.chain)
       set c = $f:r
       echo $c
       nice chainAntiRepeat /cluster/bluearc/tetNig1/nib \
                       /cluster/bluearc/tetNig1/nib $f \
                       ../chain/$c.chain
    end
    cd ..
    # take a look at score distr's,try also with larger bin size.
    foreach f (chain/*.chain)
      grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r
      echo $f:t:r >> hist10k.out
      textHistogram -binSize=10000 /tmp/score.$f:t:r >> hist10k.out
      echo ""
    end
    # trim to minScore=10000 to cut some of the fluff
    mkdir chainFilt
    foreach f (chain/*.chain)
      chainFilter -minScore=10000 $f > chainFilt/$f:t
    end
    # load tables    
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastzSelf/axtChain/chainFilt
    foreach i (*.chain)
        set c = $i:r
        hgLoadChain tetNig1 ${c}_chainSelf $i
        echo done $c
    end
# filtering on minScore=5000
# featureBits -chrom=chr1 tetNig1 chainSelf5kLink
# 2377755 bases of 16169091 (14.706%) in intersection
# rows = 362482
# filtering on minScore=10000, use this
# featureBits -chrom=chr1 tetNig1 chainSelfLink
# 2199465 bases of 16169091 (13.603%) in intersection
# rows = 233729

# NET SELF BLASTZ (DONE, 2005-01-14, hartera)
    ssh kksilo
    cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
    mkdir preNet
    cd chainFilt
    foreach i (*.chain)
       echo preNetting $i
       /cluster/bin/i386/chainPreNet $i ../../S1.len ../../S1.len \
                                     ../preNet/$i
    end
    cd ..
    mkdir n1
    cd preNet
    foreach i (*.chain)
      set n = $i:r.net
      echo primary netting $i
      /cluster/bin/i386/chainNet $i -minSpace=1 ../../S1.len ../../S1.len \
                                 ../n1/$n /dev/null
    end
    cd ..
    cat n1/*.net | /cluster/bin/i386/netSyntenic stdin noClass.net
    # memory usage 78983168, utime 485 s/100, stime 55

    # Add classification info using db tables:
    # netClass looks for ancient repeats in one of the databases
    # hg17 has this table - hand-curated by Arian but this is for
    # human-rodent comparisons so do not use here, use -noAr option

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastzSelf/axtChain
    time netClass noClass.net tetNig1 tetNig1 self.net -noAr
    # 14.530u 4.550s 0:31.26 61.0%    0+0k 0+0io 218pf+0w

    netFilter -minGap=10 self.net |  hgLoadNet tetNig1 netSelf stdin
    # featureBits tetNig1 netSelf
    # 184618536 bases of 342403326 (53.918%) in intersection

# MAKE Human Proteins track (hg17 DONE 2005-1-15 braney)
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/blastDb
    cd /cluster/data/tetNig1/blastDb
    for i in `cat ../chrom.lst`; do ls -1 ../$i/*/*.fa  ; done | sed "/Un_random/d" > list
    for i in `cat list`
    do
	ln -s $i .
	formatdb -i `basename $i` -p F
    done
    rm *.log *.fa list
    cd ..
    for i in `cat chrom.lst`; do cat  $i/chr*/*.lft  ; done > jkStuff/subChr.lft

    ssh kkr1u00
    mkdir -p /iscratch/i/tetNig1/blastDb
    cd /cluster/data/tetNig1/blastDb
    for i in nhr nin nsq; do cp *.$i /iscratch/i/tetNig1/blastDb     ; echo $i; done

    cd
    iSync > sync.out

    mkdir -p /cluster/data/tetNig1/bed/tblastn.hg17KG
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    echo  /iscratch/i/tetNig1/blastDb/*.nsq  | xargs ls -S | sed "s/\.nsq//"  > query.lst  
    # back to kksilo
    exit

    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    calc `wc /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl | awk "{print \\\$1}"`/\(300000/`wc query.lst | awk "{print \\\$1}"`\)
    # 42156/(300000/593) = 83.328360
    mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa
    split -l 83 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa/kg
    ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/kgfa kgfa
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/blastOut
    ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg17KG/blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    tcsh
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 ../../jkStuff/subChr.lft carry $f.2       
        liftUp -nosort -type=".psl" -nohead $f.4 ../../jkStuff/liftAll.lft carry $f.3       
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.4       
        if pslCheck -prot $3.tmp                                                                          
        then                                                                                              
            mv $3.tmp $3                                                                                  
            rm -f $f.1 $f.2 $f.3 $f.4                                                                              
        fi
        exit 0                                                                                            
    fi                                                                                                    
fi                                                                                                        
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4      
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec
    # I ended up doing the scaffolds separately from the chopped up chrom segments
    # but next time I wouldn't
    gensub2 scaffold.lst kg.lst blastGsub scaffoldBlastSpec

    ssh kk
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    para create blastSpec
    para push
# Completed: 301244 of 301244 jobs
# CPU time in finished jobs:   25047824s  417463.74m  6957.73h  289.91d  0.794 y
# IO & Wait Time:               2658184s   44303.06m   738.38h   30.77d  0.084 y
# Average job time:                  92s       1.53m     0.03h    0.00d
# Longest job:                   226899s    3781.65m    63.03h    2.63d
# Submission to last job:        250711s    4178.52m    69.64h    2.90d

    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainSome
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=50000 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainSome

    ls -1dS `pwd`/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    ssh kki
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    para create chainSpec
    para push
# Completed: 1287 of 1287 jobs
# CPU time in finished jobs:      72236s    1203.94m    20.07h    0.84d  0.002 y
# IO & Wait Time:                 22886s     381.43m     6.36h    0.26d  0.001 y
# Average job time:                  74s       1.23m     0.02h    0.00d
# Longest job:                     3374s      56.23m     0.94h    0.04d
# Submission to last job:          5982s      99.70m     1.66h    0.07d

    exit
    # back to kksilo
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG/blastOut
    # again some weirdness because I did the NA and Un scaffolds separately
    for i in kg??
    do 
	cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > cs60.$i.psl
	sort -rn cs60.$i.psl | pslUniq stdin us.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" cs60.$i.psl > ms60.$i.psl
	echo $i
    done

    for i in kg??
    do 
	cat $i/c.*.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat us.*.psl ms60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > /cluster/data/tetNig1/bed/tblastn.hg17KG/blastHg17KG.psl
    cd ..
    # lift the chrUn and chrNA scaffolds 

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/tblastn.hg17KG
    hgLoadPsl tetNig1 blastHg17KG.psl

    # back to kksilo
    rm -rf blastOut
# End tblastn

# GENOSCOPE HUMAN (hg17) ECORES (DONE, 2005-02-07, hartera)
    ssh kksilo
    mkdir -p /cluster/data/tetNig1/bed/Genocscope/ecoresHg17
    cd /cluster/data/tetNig1/bed/Genoscope/ecoresHg17
    
    wget --timestamp \
         http://www.genoscope.cns.fr/externe//4ucsc/ExofishTnig1Hs35
    # this is in gff format 
    # remove "Ecotig" from name field 
    sed -e 's/Ecotig EG/EG/g' ExofishTnig1Hs35 > ExofishTnig1Hs35.gff
    # need to have tabs between fields not a space to load file into table
    sed -e 's/ /\t/g' ExofishTnig1Hs35.gff > Tnig1Hs35format.gff
    # if "ecore" is changed to "CDS" and "ecotig" to "transcript" this loads
    # correctly into the table.
    sed -e 's/ecore/CDS/' Tnig1Hs35format.gff | sed -e 's/ecotig/transcript/' \
           > tetNig1vsHg17.gff
  
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/Genoscope/ecoresHg17
    nice ldHgGene tetNig1 ecoresHg17 tetNig1vsHg17.gff
    # Read 36417 transcripts in 184981 lines in 1 files
    # 36417 groups 27 seqs 1 sources 2 feature types
    # 36417 gene predictions
# added ecoresHg17 entry to trackDb.ra in trackDb/tetraodon
# and created ecoresHg17.html.  Genoscope will not be maintaining this
# newest data in their Exofish comparative browser display.


# ACCESSIONS FOR CONTIGS (DONE 2005-03-17 kate)
#  Used for Assembly track details, and also ENCODE AGP's
    cd /cluster/data/tetNig1/bed
    mkdir -p contigAcc
    cd contigAcc
    
    # extract WGS contig to accession mapping from Entrez Nucleotide summaries
    # To get the summary file, access the Genbank page for the project 
    # by searching:
    #       genus[ORGN] AND WGS[KYWD]
    # At the end of the page, click on the list of accessions for the contigs.
    # Select summary format, and send to file.
    # output to file <species>.acc

    grep SCAF tetra.acc | wc -l
    # 25773

    # edit hg/encode/regionAgp/contigAccession.pl to recognize tetra format
    # and install in /cluster/bin/scripts
    contigAccession tetra.acc > contigAcc.tab
    wc -l contigAcc.tab
    # 25773
    grep SCAF /cluster/data/tetNig1/chr.agp | wc -l
    # 25773
    hgsql tetNig1 < $HOME/kent/src/hg/lib/contigAcc.sql
    echo "LOAD DATA LOCAL INFILE 'contigAcc.tab' INTO TABLE contigAcc" | \
        hgsql tetNig1
    hgsql tetNig1 -e "SELECT COUNT(*) FROM contigAcc"
        # 25773

#########################################################################
# MOUSE NET/CHAINS MM6 - Info contained in makeMm6.doc (200503 Hiram)

# MAKE 2BIT FILES FOR 500 kb SEQUENCE CONTIGS, CHROMS CONTIGS AND RANDOM
# CHROMOSOME SCAFFOLDS 
# (DONE, 2005-08-24, hartera)
     ssh hgwdev
     # 2bit file from 500 kb contigs file used for blastz alignments
     faToTwoBit /cluster/bluearc/tetNig1/contigs/tetNig1Contigs.fa \
              /cluster/bluearc/tetNig1/contigs/tetNig1Contigs.2bit

     # then make FASTA file and 2bit file for just the ordered chroms and
     # not the randoms.
     ssh kkr1u00
     mkdir -p /cluster/data/tetNig1/twoBit
     cd /cluster/data/tetNig1/twoBit
     grep '>' /iscratch/i/tetNig1/contigs/tetNig1Contigs.fa | sed -e 's/>//' \
        > tetNig1.contigs
     grep -v "random" tetNig1.contigs > tetNig1.contigs.norandoms
     wc -l tetNig1.contigs.norandoms
     # 507 tetNig1.contigs.norandoms
     # get these sequence without the randoms, run randoms as scaffolds
     faSomeRecords /iscratch/i/tetNig1/contigs/tetNig1Contigs.fa \
        tetNig1.contigs.norandoms tetNig1ContigNoRandoms.fa
     faToTwoBit tetNig1ContigsNoRandoms.fa tetNig1ContigsNoRandoms.2bit
     mv tetNig1ContigsNoRandoms.2bit /cluster/bluearc/tetNig1/contigs/
     cp /cluster/bluearc/tetNig1/contigs/tetNig1ContigsNoRandoms.2bit \
        /iscratch/i/tetNig1/contigs/
     # get list of scaffolds for randoms - need to make faFrag list to get 
     # sequences:
     cd /cluster/data/tetNig1/twoBit
     mkdir -p /cluster/bluearc/tetNig1/randomScaffolds
     mkdir -p /cluster/bluearc/tetNig1/randomFa
     mkdir run
     cd run
     grep 'random' /cluster/data/tetNig1/chrom.lst > chroms.random
     foreach c (`cat chroms.random`)
        set dir=/cluster/data/tetNig1
        echo $c
        cp $dir/${c}/chr{$c}.fa /cluster/bluearc/tetNig1/randomFa/
     end 
cat > getSeqs.csh << 'EOF'
#!/bin/csh -ef
set inFa=$1
set st=$2
set end=$3
set out=$4

faFrag -mixed $inFa $st $end $out > ${out}.log
'EOF'
     chmod +x getSeqs.csh
     awk '{if (($1 ~ /random/) && ($5 ~ /SCAF/)) print "/cluster/data/tetNig1/twoBit/run/getSeqs.csh","/cluster/bluearc/tetNig1/randomFa/"$1".fa",$2-1,$3,"/cluster/bluearc/tetNig1/randomScaffolds/"$5".fa"}' /cluster/data/tetNig1/chr.agp \
       > jobList
     para create jobList  
     awk '{if ($1 ~ /random/ && ($5 !~ /GAP/)) print $5;}' \
        /cluster/data/tetNig1/chr.agp > randoms.scaffolds     
     wc -l jobList
     # 24571 jobList
     wc -l randoms.scaffolds
     # 24571 randoms.scaffolds
     para try, check, push, check etc.
# para time
# Completed: 24571 of 24571 jobs
# CPU time in finished jobs:     249907s    4165.12m    69.42h    2.89d  0.008 y
# IO & Wait Time:                 93508s    1558.46m    25.97h    1.08d  0.003 y
# Average job time:                  14s       0.23m     0.00h    0.00d
# Longest running job:                0s       0.00m     0.00h    0.00d
# Longest finished job:            3689s      61.48m     1.02h    0.04d
# Submission to last job:          3952s      65.87m     1.10h    0.05d

     # once have all the sequences, cat together and make a 2bit file
     ssh kolossus
     cd /cluster/bluearc/tetNig1/randomScaffolds 
     # replace FASTA header with scaffold name and cat sequences together
     foreach f (SCAF*.fa)
        set s=$f:r
        perl -pi.bak -e "s/>chr[0-9Un]+_random:[0-9]+\-[0-9]+/>$s/" $f
        cat $f >> tetraRandomsScaffolds.fa
     end
     faToTwoBit tetraRandomsScaffolds.fa tetraRandomsScaffolds.2bit
     # then cat to the contigs file without randoms and make a 2bit file
     cat tetraRandomsScaffolds.fa \
       /cluster/data/tetNig1/twoBit/tetNig1ContigNoRandoms.fa \
       > tetNig1ChrContigsRandomScafs.fa
     faToTwoBit tetNig1ChrContigsRandomScafs.fa \
                tetNig1ChrContigsRandomScafs.2bit
     ssh kkr1u00
     cd /cluster/bluearc/tetNig1/randomScaffolds
     # copy 2bit files to iservers
     cp *.2bit /iscratch/i/tetNig1/contigs
     cp *.2bit /cluster/bluearc/tetNig1/contigs
     # rsync to the iservers - this takes a long time these days
     # so do this with rsync
     foreach i (2 3 4 5 6 7 8)
        rsync -a --progress --delete --rsh=rsh \
           /iscratch/i/tetNig1/contigs/ kkr${i}u00:/iscratch/i/tetNig1/contigs/
     end
     # make a 2bit file with the full chroms and the random scaffolds
     ssh kolossus
     cd /cluster/data/tetNig1/twoBit
     grep -v "random" /cluster/data/tetNig1/chrom.lst > chromsNoRandoms.lst 
     foreach c (`cat chromsNoRandoms.lst`)
       cat /cluster/data/tetNig1/${c}/chr${c}.fa >> tetNig1chromsNoRandoms.fa
     end
     # then add scaffolds FASTAs
     cat tetNig1chromsNoRandoms.fa \
        /cluster/bluearc/tetNig1/randomScaffolds/tetraRandomsScaffolds.fa \
         > tetNig1ChromsRandomScafs.fa
     faToTwoBit tetNig1ChromsRandomScafs.fa \
                tetNig1ChromsRandomScafs.2bit
     mkdir -p /cluster/bluearc/tetNig1/twoBit
     mv tetNig1ChromsRandomScafs* /cluster/bluearc/tetNig1/twoBit/
     ssh kkstore03
     mkdir /cluster/data/tetNig1/liftScaffoldsToChrom
     cd /cluster/data/tetNig1/liftScaffoldsToChrom
     foreach c (`cat chromsNoRandoms.lst`)
       awk 'BEGIN {FS="\t"} {OFS="\t"} {print $2-1,$6,$3,$1;}' \
          /cluster/data/tetNig1/${c}/${c}.agp > ${c}.lft
       awk '{if ($1 == "'${c}'") print $2;}' chrom.sizes

#########################################################################
# MOUSE NET/CHAINS MM7 - Info contained in makeMm7.doc (200503 Hiram)
#########################################################################
#	Prep for the Mm7 net/chains, need a lift file for the randoms
#	(DONE - 2005-09-20 - Hiram)
    ssh kkstore003
    cd /cluster/data/tetNig1
    #	We need the lift information from the random.agp files
    cp -p /cluster/data/cb2/scripts/agpToLift.pl ./jkStuff
    mkdir liftScaffoldsToChrom
    for AGP in ?_random/*_random.agp ??_random/*_random.agp
do
    CHR=`dirname ${AGP}`
    grep -v fragment "${AGP}" | ./jkStuff/agpToLift.pl /dev/stdin \
	> liftScaffoldsToChrom/${CHR}.lft
done
    cat liftScaffoldsToChrom/*.lft > jkStuff/chromsAndScafs.lft
    cp -p jkStuff/chromsAndScafs.lft \
	/san/sanvol1/scratch/tetNig1/chromsAndScafs/chromsAndScafs.lft

# BLASTZ SWAP FOR ZEBRAFISH (danRer3) (DONE, 2005-10-20, hartera)
# CREATE CHAIN AND NET TRACKS, AXTNET, MAFNET, LIFTOVER AND ALIGNMENT DOWNLOADS.
                                                                                
    # do swap of danRer3 vs. tetNig1 chain and net alignments to
    # create tetNig1 vs. danRer3. see makeDanRer3.doc for details.
    ssh hgwdev
    cd /cluster/data/danRer3/bed/blastz.tetNig1/chromsAndScafsRun
    nohup nice /cluster/bin/scripts/doBlastzChainNet.pl `pwd`/DEF \
       -bigClusterHub=pk \
       -swap -chainMinScore=5000 >& doSwap.log &
    # Start: Thu Oct 20 11:59  Finished: Oct 20 12:25
# Parameters used for danRer3 vs tetNig1 BLASTZ:
# BLASTZ_M=50
# BLASTZ_H=2500
# BLASTZ_Q=/cluster/data/blastz/HoxD55.q
# #BLASTZ_ABRIDGE_REPEATS=1 if SMSK is specified
# BLASTZ_ABRIDGE_REPEATS=0

# Add entries for danRer3 chains and net to trackDb.ra for monDom2 and
# Modify track descriptions to describe the process using scaffolds for danRer3
# chrNA and chrUn and the fact that dynamic masking was used for the Blastz
# alignments - see makeDanRer3.doc for details.

############################################################################
##  BLASTZ swap from mm8 alignments (DONE - 2006-02-19 - Hiram)
    ssh kk
    cd /cluster/data/mm8/bed/blastzTetNig1.2006-02-19
    time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=kk -chainMinScore=5000 -chainLinearGap=loose \
        -swap `pwd`/DEF > swap.out 2>&1 &
    time /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=kk -chainMinScore=5000 -chainLinearGap=loose \
        -swap -continue=net `pwd`/DEF > swap-net.out 2>&1 &

    time nice -n +19 featureBits tetNig1 chainMm8Link
    #   47024263 bases of 342403326 (13.734%) in intersection

###########################################################################
# BLASTZ SWAP TO CREATE CHAIN AND NET ALIGNMENTS FOR ZEBRAFISH (danRer4) 
# AND CREATE AXNET, MAFNET, LIFTOVER AND ALIGNMENT DOWNLOADS 
# (DONE, 2006-04-30, hartera)
   ssh pk
   # Used these BLASTZ parameters - see also makeDanRer4.doc
   # BLASTZ_H=2500
   # BLASTZ_M=50
   # BLASTZ_Q=/cluster/data/blastz/HoxD55.q 
   # and no lineage-specific repeats.
   # swap directory for alignments is:
   # /cluster/data/tetNig1/bed/blastz.danRer4.swap
   cd /cluster/data/danRer4/bed/blastz.tetNig1.2006-04-29
   nice /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
        -swap `pwd`/DEF >& doSwap.log &
   # Took about 22 minutes.
   # Check coverage compared to danRer3:
   featureBits tetNig1 chainDanRer4Link
   # 82462283 bases of 342403326 (24.083%) in intersection  
   featureBits tetNig1 chainDanRer3Link
   # 76373873 bases of 342403326 (22.305%) in intersection
   # No refGene table for tetNig1 so use mrna: 
   featureBits -chrom=chr1 tetNig1 mrna chainDanRer4Link -enrichment
   # mrna 2.624%, chainDanRer4Link 22.894%, both 1.297%, cover 49.42%, 
   # enrich 2.16x
   featureBits -chrom=chr1 tetNig1 mrna chainDanRer3Link -enrichment
   # mrna 2.624%, chainDanRer3Link 21.321%, both 1.205%, cover 45.92%, 
   # enrich 2.15x
   featureBits -chrom=chr1 tetNig1 mrna netDanRer4 -enrichment
   # mrna 2.624%, netDanRer4 78.279%, both 2.278%, cover 86.81%, enrich 1.11x
   featureBits -chrom=chr1 tetNig1 mrna netDanRer3 -enrichment
   # mrna 2.624%, netDanRer3 95.070%, both 2.435%, cover 92.80%, enrich 0.98x
   # Similar coverage for chains and nets for danRer4 and danRer3. 
   # nets coverage is a little lower for danRer4 than for danRer3.
   # Try also Human Proteins (hg17):
   featureBits -chrom=chr1 tetNig1 blastHg17KG netDanRer4 -enrichment
   # blastHg17KG 5.728%, netDanRer4 78.279%, both 5.666%, cover 98.92%, 
   # enrich 1.26x
   featureBits -chrom=chr1 tetNig1 blastHg17KG netDanRer3 -enrichment
   # blastHg17KG 5.728%, netDanRer3 95.070%, both 5.687%, cover 99.29%, 
   # enrich 1.04x
   # Similar coverage here too. Looked at chain and net tracks for danRer4
   # on the tetNig1 Browser and they look ok.

#########################################################################
##  Reorder Fish organisms (DONE - 2006-12-22 - Hiram)
    hgsql -h genome-testdb hgcentraltest \
	-e "update dbDb set orderKey = 460 where name = 'tetNig1';"

#########################################################################
# BLASTZ/CHAIN/NET FR2 (DONE - 2007-01-29 - Hiram)
##  Align to fr2 scaffolds,
##	results lifted to fr2 chrUn coordinates
    ssh kkstore03
    mkdir /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
    cd /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
    cat << '_EOF_' > DEF
# Tetraodon vs. Fugu

# Try "human-fugu" (more distant, less repeat-killed than mammal) params
# +M=50:
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Tetraodon tetNig1
SEQ1_DIR=/san/sanvol1/scratch/tetNig1/tetNig1.sdTrf.2bit
SEQ1_LEN=/san/sanvol1/scratch/tetNig1/chrom.sizes
SEQ1_CHUNK=20000000
SEQ1_LAP=10000
SEQ1_LIMIT=1

# QUERY: Fugu fr2
#       Align to the scaffolds, results lifed up to chrUn.sdTrf coordinates
SEQ2_DIR=/san/sanvol1/scratch/fr2/fr2.2bit
SEQ2_LEN=/san/sanvol1/scratch/fr2/chrom.sizes
SEQ2_CTGDIR=/san/sanvol1/scratch/fr2/fr2.scaffolds.2bit
SEQ2_CTGLEN=/san/sanvol1/scratch/fr2/fr2.scaffolds.sizes
SEQ2_LIFT=/san/sanvol1/scratch/fr2/liftAll.lft
SEQ2_CHUNK=20000000
SEQ2_LIMIT=30
SEQ2_LAP=0

BASE=/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
TMPDIR=/scratch/tmp
'_EOF_'
    # << this line keeps emacs coloring happy

    time doBlastzChainNet.pl DEF -chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-verbose=2 -bigClusterHub=pk \
	-blastzOutRoot /cluster/bluearc/tetNig1Fr2 > do.log 2>&1 &
    #	real    259m39.548s

    ## Swap to fr2
    mkdir /cluster/data/fr2/bed/blastz.tetNig1.swap
    cd /cluster/data/fr2/bed/blastz.tetNig1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-bigClusterHub=pk -swap > swap.log 2>&1 &
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/tetNig1/bed/blastz.fr2.2007-01-25/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-continue=net -bigClusterHub=pk -swap > net_swap.log 2>&1 &
    #	real    40m40.471s

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.fr2.2007-01-25
    time nice -n +19 featureBits tetNig1 chainFr2Link \
	> fb.tetNig1.chainFr2Link.txt 2>&1 
    #	246828605 bases of 342403326 (72.087%) in intersection

    cd /cluster/data/fr2/bed/blastz.tetNig1.swap
    time nice -n +19 featureBits fr2 chainTetNig1Link \
	> fb.fr2.chainTetNig1.txt 2>&1
    #	247086553 bases of 393312790 (62.822%) in intersection

#########################################################################
## SWAP oryLat1 to tetNig1 (DONE - 2007-01-18 - Hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13
    cat fb.oryLat1.chainTetNig1Link.txt
    #	160388236 bases of 700386597 (22.900%) in intersection

    # And the swap
    ssh kkstore04
    cd /cluster/data/tetNig1/bed
    mv blastz.oryLat1.swap blastz.oryLat1.swap.2006-12-08
    mkdir blastz.oryLat1.swap
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-swap -bigClusterHub=kk > swap.log 2>&1 &
    time doBlastzChainNet.pl -verbose=2 \
	/cluster/data/oryLat1/bed/blastz.tetNig1.2006-12-13/DEF \
	-chainMinScore=2000 -chainLinearGap=loose \
	-tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-continue=net -swap -bigClusterHub=kk > net-swap.log 2>&1 &

    ssh hgwdev
    cd /cluster/data/tetNig1/bed/blastz.oryLat1.swap
    nice -n 19 featureBits -noRandom tetNig1 chainOryLat1Link \
	>fb.tetNig1.oryLat1.txt 2>&1
    #	130327582 bases of 342403326 (38.063%) in intersection

##########################################################################
# N-SCAN gene predictions (nscanGene) - (2007-08-16 markd)
    cd /cluster/data/tetNig1/bed/nscan/

    # obtained NSCAN predictions from michael brent's group
    # at WUSTL
    wget http://mblab.wustl.edu/predictions/Tetraodon/tetNig1.gtf
    wget http://mblab.wustl.edu/predictions/Tetraodon/tetNig1.prot.fa
    bzip2 tetNig1.*

    # load tracks.  Note that these have *utr features, rather than
    # exon features.  currently ldHgGene creates separate genePred exons
    # for these.
    ldHgGene -bin -gtf -genePredExt tetNig1 nscanGene tetNig1.gtf.bz2

    # load protein
    hgPepPred tetNig1 generic nscanPep tetNig1.prot.fa.bz2
    rm *.tab

    # update trackDb; need a tetNig1-specific page to describe informants
    tetraodon/tetNig1/nscanGene.html   (copy from mm8 and edit)
    tetraodon/tetNig1/trackDb.ra

##############################################################################
# SWAP mm9 alignments to tetNig1 (DONE - 2007-09-07 - Hiram)
    #	the original was done at:
    cd /cluster/data/mm9/bed/blastzTetNig1.2007-09-06
    cat fb.mm9.chainTetNig1Link.txt
    #	46206292 bases of 2620346127 (1.763%) in intersection

    mkdir /cluster/data/tetNig1/bed/blastz.mm9.swap
    cd /cluster/data/tetNig1/bed/blastz.mm9.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/cluster/data/mm9/bed/blastzTetNig1.2007-09-06/DEF \
	-chainMinScore=5000 \
	-qRepeats=windowmaskerSdust -chainLinearGap=loose \
	-swap -bigClusterHub=kk > swap.log 2>&1 &
    #	real    19m58.885s
    cat fb.tetNig1.chainMm9Link.txt
    #	42256263 bases of 342403326 (12.341%) in intersection

##############################################################################
# SWAP DANRER5 BLASTZ RESULT TO GET DANRER5 CHAINS, NETS,
# AXTNET, MAFNET AND ALIGNMENT DOWNLOADS (DONE, 2007-09-17, hartera)
    
    ssh kkstore04
    mkdir /cluster/data/tetNig1/bed/blastz.swap.danRer5
    cd /cluster/data/tetNig1/bed/blastz.swap.danRer5
    # blastz parameters used to align tetNig1 as query to danRer5 as target:
    # BLASTZ_H=2500
    # BLASTZ_M=50
    # BLASTZ_Q=/cluster/data/blastz/HoxD55.q
    # Results for tetNig1 blastz on danRer5 are in:
    # /cluster/data/danRer5/bed/blastz.tetNig1.2007-09-16
    time nice /cluster/bin/scripts/doBlastzChainNet.pl -verbose=2 \
        -bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
        -qRepeats=windowmaskerSdust -swap \ 
        /cluster/data/danRer5/bed/blastz.tetNig1.2007-09-16/DEF \
         >& swap.log &
    # 0.141u 0.097s 12:30.60 0.0%     0+0k 0+0io 7pf+0w 
    
    ssh hgwdev
    cat \ 
  /cluster/data/tetNig1/bed/blastz.danRer5.swap/fb.tetNig1.chainDanRer5Link.txt
    # 70448527 bases of 342403326 (20.575%) in intersection 
    # look at coverage of mrna, there is no native RefSeqs 
    # track for tetraodon, tetNig1.
    featureBits tetNig1 all_mrna chainDanRer5Link -enrichment
# all_mrna 3.104%, chainDanRer5Link 20.575%, both 1.382%, cover 44.53%, enrich
# 2.16x
    featureBits tetNig1 all_mrna chainDanRer4Link -enrichment
# all_mrna 3.104%, chainDanRer4Link 24.083%, both 1.522%, cover 49.02%, enrich
# 2.04x
    featureBits tetNig1 all_mrna netDanRer5 -enrichment
# all_mrna 3.104%, netDanRer5 70.075%, both 2.400%, cover 77.31%, enrich 1.10x
    featureBits tetNig1 all_mrna netDanRer4 -enrichment
# all_mrna 3.104%, netDanRer4 69.480%, both 2.445%, cover 78.78%, enrich 1.13x

    # clean up a little 
    ssh kkstore04
    cd /cluster/data/tetNig1/bed
    mv ./blastz.swap.danRer5/swap.log ./blastz.danRer5.swap
    rm -r blastz.swap.danRer5
    ln -s blastz.danRer5.swap blastz.danRer5
############################################################################
# TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30

see doc/builds.txt for specific details.

############################################################################
# HUMAN (hg18) PROTEINS TRACK (DONE braney 2008-07-18)
    ssh kkstore03
    # bash  if not using bash shell already

    mkdir /cluster/data/tetNig1/blastDb
    cd /cluster/data/tetNig1
    cat noUn/chr*fa > temp.fa
    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
    rm temp.fa
    cat randomContigs/*.fa > temp.fa
    faSplit sequence temp.fa 150 blastDb/y
    rm temp.fa
    cd blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -i $i -p F
    done
    rm *.fa

    mkdir -p /san/sanvol1/scratch/tetNig1/blastDb
    cd /cluster/data/tetNig1/blastDb
    for i in nhr nin nsq; 
    do 
	echo $i
	cp *.$i /san/sanvol1/scratch/tetNig1/blastDb
    done

    mkdir -p /cluster/data/tetNig1/bed/tblastn.hg18KG
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG
    echo  /san/sanvol1/scratch/tetNig1/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
    wc -l query.lst
# 386 query.lst

   # we want around 150000 jobs
   calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk "{print \\\$1}"`/\(150000/`wc query.lst | awk "{print \\\$1}"`\)

# 36727/(150000/386) = 94.510813

   mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa
   split -l 95 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa/kg
   ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/kgfa kgfa
   cd kgfa
   for i in *; do 
     nice pslxToFa $i $i.fa; 
     rm $i; 
   done
   cd ..
   ls -1S kgfa/*.fa > kg.lst
   mkdir -p /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
   ln -s /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
   for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
   cd /cluster/data/tetNig1/bed/tblastn.hg18KG
   tcsh
   cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
#ENDLOOP
'_EOF_'

   cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/cluster/bluearc/blast229/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/tetNig1/blastDb.lft carry $f.2
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3

        if pslCheck -prot $3.tmp                              
        then  
            mv $3.tmp $3                                               
            rm -f $f.1 $f.2 $f.3 $f.4
        fi
        exit 0 
    fi 
fi                                                                                
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'
    # << happy emacs
    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec
    exit 
    
    ssh pk
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG
    para create blastSpec
#    para try, check, push, check etc.

    para time

# Completed: 136074 of 136074 jobs

# CPU time in finished jobs:    2945512s   49091.87m   818.20h   34.09d  0.093 y
# IO & Wait Time:                569398s    9489.96m   158.17h    6.59d  0.018 y
# Average job time:                  26s       0.43m     0.01h    0.00d
# Longest finished job:             881s      14.68m     0.24h    0.01d
# Submission to last job:        146804s    2446.73m    40.78h    1.70d

    ssh kkstore03
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG
    mkdir chainRun
    cd chainRun
    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=100000 stdin /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
'_EOF_'
    chmod +x chainOne
    ls -1dS /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec
    # do the cluster run for chaining
    ssh memk
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG/chainRun
    para create chainSpec
    para try, check, push, check etc.

# Completed: 387 of 387 jobs
# CPU time in finished jobs:       1982s      33.04m     0.55h    0.02d  0.000 y
# IO & Wait Time:                 25746s     429.10m     7.15h    0.30d  0.001 y
# Average job time:                  72s       1.19m     0.02h    0.00d
# Longest finished job:             165s       2.75m     0.05h    0.00d
# Submission to last job:          2784s      46.40m     0.77h    0.03d

    ssh kkstore03
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG/blastOut
    for i in kg??
    do
       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
       echo $i
    done
    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/tetNig1/bed/tblastn.hg18KG/unliftBlastHg18KG.psl
    cd ..
    pslCheck unliftBlastHg18KG.psl
    liftUp -nohead temp.psl ../../randomContigs/tetNig1.randomContigs.lift carry unliftBlastHg18KG.psl
    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n temp.psl > blastHg18KG.psl
    rm temp.psl

    # load table 
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/tblastn.hg18KG
    hgLoadPsl tetNig1 blastHg18KG.psl

    # check coverage
    featureBits tetNig1 blastHg18KG 
# 19028060 bases of 342403326 (5.557%) in intersection

    featureBits tetNig1 ensGene:cds blastHg18KG  -enrichment
# ensGene:cds 9.909%, blastHg18KG 5.557%, both 4.906%, cover 49.51%, enrich 8.91x

    featureBits tetNig1 nscanGene:cds blastHg18KG  -enrichment
# nscanGene:cds 9.388%, blastHg18KG 5.557%, both 3.568%, cover 38.01%, enrich 6.84x

    ssh kkstore03
    rm -rf /cluster/data/tetNig1/bed/tblastn.hg18KG/blastOut
    rm -rf /cluster/bluearc/tetNig1/bed/tblastn.hg18KG/blastOut
#end tblastn

##########################################################################
# SWAP BLASTZ Medaka oryLat2 (DONE - 2008-08-27 - Hiram)
    ssh kkstore03	# not too important since everything moved to hive
    screen # use screen to control this job
    cd /cluster/data/oryLat2/bed/blastz.tetNig1.2008-08-26
    cat fb.oryLat2.chainTetNig1Link.txt
    #	163171121 bases of 700386597 (23.297%) in intersection

    #	And, for the swap:
    mkdir /cluster/data/tetNig1/bed/blastz.oryLat2.swap
    cd /cluster/data/tetNig1/bed/blastz.oryLat2.swap
    time doBlastzChainNet.pl -chainMinScore=3000 -chainLinearGap=medium \
	/cluster/data/oryLat2/bed/blastz.tetNig1.2008-08-26/DEF \
	-swap -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \
	-verbose=2 -smallClusterHub=pk -bigClusterHub=pk > swap.log 2>&1 &
    #	real    22m48.350s
    cat fb.tetNig1.chainOryLat2Link.txt
    #	139059872 bases of 342403326 (40.613%) in intersection

##########################################################################
# add NCBI identifiers to the dbDb (DONE - 2008-10-21 - Hiram)
    hgsql -e 'update dbDb set
sourceName="Genoscope and Broad Institute V7 (NCBI project 12350, CAAE01000000)" where name="tetNig1";' hgcentraltest
###########################################################################
############################################################################
# TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13

see doc/builds.txt for specific details.

############################################################################
# LIFTOVER TO TetNig2 (DONE - 2010-01-15 - Hiram )
    mkdir /hive/data/genomes/tetNig1/bed/blat.tetNig2.2010-01-15
    cd /hive/data/genomes/tetNig1/bed/blat.tetNig2.2010-01-15
    # -debug run to create run dir, preview scripts...
    doSameSpeciesLiftOver.pl -debug tetNig1 tetNig2
    # Real run:
    time nice -n +19 doSameSpeciesLiftOver.pl -verbose=2 \
	-bigClusterHub=pk -dbHost=hgwdev -workhorse=hgwdev \
	 tetNig1 tetNig2 > do.log 2>&1
    #	real    36m16.693s
#############################################################################