# for emacs: -*- mode: sh; -*- # This file describes how we made the browser database on the # Chicken (Gallus gallus) January 2004 release. # CREATE BUILD DIRECTORY (DONE 1/23/04 angie) ssh kksilo mkdir /cluster/store6/galGal1 ln -s /cluster/store6/galGal1 /cluster/data/galGal1 # DOWNLOAD MITOCHONDRION GENOME SEQUENCE (DONE 1/23/04 angie) mkdir /cluster/data/galGal1/M cd /cluster/data/galGal1/M # go to http://www.ncbi.nih.gov/ and search Nucleotide for # "gallus mitochondrion genome". That shows the gi number: # 5834843 # Use that number in the entrez linking interface to get fasta: wget -O chrM.fa \ 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=Nucleotide&uid=5834843&dopt=FASTA' # Edit chrM.fa: make sure the long fancy header line says it's the # Gallus gallus mitochondrion complete genome, and then replace the # header line with just ">chrM". # DOWNLOAD GENOME SEQUENCE (DONE 1/28/04 angie) ssh kksilo cd /cluster/data/galGal1 # Contig sequences have been stable for a while: wget ftp://genome.wustl.edu/private/chicken_040105/Fasta/chicken_040105.contigs.tar.gz # Official location (password-protected): # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_040105.contigs.tar.gz mkdir contigs cd contigs tar xvfz ../chicken_040105.contigs.tar.gz foreach f (chicken_031214.pcap.contigs*) set n = `echo $f | sed -e 's/chicken_031214.pcap.contigs//'` mv $f contigs$n.fa end faSize *.fa #1054180845 bases (107580 N's 1054073265 real) in 111864 sequences in 100 files #Total size: mean 9423.8 sd 19990.9 min 52 (Contig28908.1) max 441791 (Contig132.21) median 2122 #N count: mean 1.0 sd 5.1 mkdir /cluster/bluearc/galGal1 cat ../contigs/*.fa > /cluster/bluearc/galGal1/allContigs.fa # Get qual scores too wget ftp://genome.wustl.edu/private/chicken_040105/Qual/chicken_040105.contigs.qual.tar.gz # Official location (password-protected): # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_040105.contigs.qual.tar.gz # BUILD AND CHECK CHROM-LEVEL SEQUENCE (DONE 1/30/04 angie) # 1/30/04: ZW_random has been made a part of Z. # on hgwdev, "hgsql galGal1" and get rid of any chrZW_random tables ssh kolossus cd /cluster/data/galGal1 wget ftp://genome.wustl.edu/private/lhillier/old/chicken_agp.040129.tar.gz # Note: this January 30th tarfile is labeled January 29 -- rename it # to avoid confusion with several Jan. 29 agp downloads. mv chicken_agp.040129.tar.gz chicken_agp.040130.tar.gz # Official location (password-protected): # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken_agp.040129.tar.gz # I downloaded that and cmp'd to chicken_agp.040130.tar.gz -- same. tar xvzf chicken_agp.040130.tar.gz cd chicken_agp # confirmed that the only AGP changes from chicken_agp_29_third were # that ZW_random went away and Z got some new stuff at the beginning. cp /dev/null ../chrom.lst foreach f (*.agp) set chr = `echo $f:r | sed -e 's/^chr//'` set base = `echo $chr | sed -e 's/_random$//'` mkdir -p ../$base cp -p $f ../$base if ("$chr" == "$base") echo $chr >> ../chrom.lst end # OK, tack chrM on there too: echo M >> ../chrom.lst cd /cluster/data/galGal1 # get rid of the ZW_random and make a clean slate for Z (not Z_random) rm -r ZW Z/chrZ.fa* Z/chrZ_? nib/chrZW_random.nib foreach c (`cat chrom.lst`) foreach agp ($c/chr$c{,_random}.agp) if (-e $agp) then set fa = $agp:r.fa echo building $fa from $agp agpToFa -simpleMultiMixed $agp $agp:t:r $fa \ /cluster/bluearc/galGal1/allContigs.fa endif end end # checkAgpAndFa prints out way too much info -- keep the end/stderr only: foreach c (`cat chrom.lst`) foreach agp ($c/chr$c{,_random}.agp) if (-e $agp) then set fa = $agp:r.fa echo checking consistency of $agp and $fa checkAgpAndFa $agp $fa | tail -1 endif end end faSize */chr*.fa #1172160385 bases (118070345 N's 1054090040 real) in 47 sequences in 47 files #Total size: mean 24939582.7 sd 47753836.2 min 2535 (chrE50C23) max 229650211 (chrUn) median 7900282 #N count: mean 2512135.0 sd 12440150.2 # COMPARE BASE COUNTS WITH WUSTL'S (DONE 2/1/04 angie) # Compare LaDeana's size summary file with our chrom faSize output: cd /cluster/data/galGal1 # Download from password-protected location: # http://genome.wustl.edu/projects/chicken/chicken_analysis/chicken.localized_per_chromosome # Actually, we can only compare total bases with faSize because faSize # doesn't differentiate between an N from a gap and an N base. tail +4 chicken.localized_per_chromosome \ | perl -wpe 'next if (/^\s*$/); \ if (s/^(\w+)\s+(\d+)\s+(\d+)\s*$//) { \ ($c, $noGapBP, $totalBP) = ($1, $2, $3); \ $c =~ s/Unlocalized/Un/; \ $chr = "chr$c"; \ $c =~ s/_random//; \ $faSize = "faSize $c/$chr.fa"; \ $faSize = `$faSize`; \ if ($faSize =~ /^(\d+) bases \((\d+) N.s (\d+) real/) { \ if ($totalBP != $1) { \ print "$chr totalBP: WUSTL $totalBP, us $1!\n"; \ } \ } else { print "parsed faSize wrong:\n$faSize\n"; } \ } else { print "what is this?\n$_\n"; }' # BREAK UP SEQUENCE INTO 5 MB CHUNKS AT CONTIGS/GAPS (DONE 1/30/04 angie) ssh kksilo cd /cluster/data/galGal1 foreach c (`cat chrom.lst`) foreach agp ($c/chr$c{,_random}.agp) if (-e $agp) then set fa = $agp:r.fa echo splitting $agp and $fa cp -p $agp $agp.bak cp -p $fa $fa.bak splitFaIntoContigs $agp $fa . -nSize=5000000 endif end end # splitFaIntoContigs makes new dirs for _randoms. Move their contents # back into the main chrom dirs and get rid of the _random dirs. foreach d (*_random) set base = `echo $d | sed -e 's/_random$//'` mv $d/lift/oOut.lst $base/lift/rOut.lst mv $d/lift/ordered.lft $base/lift/random.lft mv $d/lift/ordered.lst $base/lift/random.lst rmdir $d/lift mv $d/* $base rmdir $d end # Make a "pseudo-contig" for processing chrM too: mkdir M/chrM_1 sed -e 's/chrM/chrM_1/' M/chrM.fa > M/chrM_1/chrM_1.fa mkdir M/lift echo "chrM_1/chrM_1.fa.out" > M/lift/oOut.lst echo "chrM_1" > M/lift/ordered.lst echo "0 M/chrM_1 16775 chrM 16775" > M/lift/ordered.lft # MAKE JKSTUFF AND BED DIRECTORIES (DONE 1/23/04 angie) # This used to hold scripts -- better to keep them inline here so # they're in CVS. Now it should just hold lift file(s) and # temporary scripts made by copy-paste from this file. mkdir /cluster/data/galGal1/jkStuff # This is where most tracks will be built: mkdir /cluster/data/galGal1/bed # CREATING DATABASE (DONE 1/28/04 angie) ssh hgwdev echo 'create database galGal1' | hgsql '' # Use df to make sure there is at least 75G free on hgwdev:/var/lib/mysql df -h /var/lib/mysql # /dev/sdc1 1.8T 221G 1.5T 14% /var/lib/mysql # CREATING GRP TABLE FOR TRACK GROUPING (DONE 1/28/04 angie) ssh hgwdev echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \ | hgsql galGal1 # MAKE CHROMINFO TABLE WITH (TEMPORARILY UNMASKED) NIBS (DONE 1/30/04 angie) # Make nib/, unmasked until RepeatMasker and TRF steps are done. # Do this now so we can load up RepeatMasker and run featureBits; # can also load up other tables that don't depend on masking. ssh kksilo cd /cluster/data/galGal1 mkdir nib foreach c (`cat chrom.lst`) foreach f ($c/chr${c}{,_random}.fa) if (-e $f) then echo "nibbing $f" /cluster/bin/i386/faToNib $f nib/$f:t:r.nib endif end end # Make symbolic links from /gbdb/galGal1/nib to the real nibs. ssh hgwdev mkdir -p /gbdb/galGal1/nib foreach f (/cluster/data/galGal1/nib/chr*.nib) ln -s $f /gbdb/galGal1/nib end # Load /gbdb/galGal1/nib paths into database and save size info. cd /cluster/data/galGal1 hgsql galGal1 < $HOME/kent/src/hg/lib/chromInfo.sql hgNibSeq -preMadeNib galGal1 /gbdb/galGal1/nib */chr*.fa echo "select chrom,size from chromInfo" | hgsql -N galGal1 > chrom.sizes # take a look at chrom.sizes, should be 47 lines wc chrom.sizes # REPEAT MASKING (DONE 1/31/04 angie) #- Split contigs into 500kb chunks, at gaps if possible: ssh kksilo cd /cluster/data/galGal1 foreach c (`cat chrom.lst`) foreach d ($c/chr${c}*_?{,?}) cd $d echo "splitting $d" set contig = $d:t ~/bin/i386/faSplit gap $contig.fa 500000 ${contig}_ -lift=$contig.lft \ -minGapSize=100 cd ../.. end end #- Make the run directory and job list: cd /cluster/data/galGal1 cat << '_EOF_' > jkStuff/RMChicken #!/bin/csh -fe cd $1 pushd . /bin/mkdir -p /tmp/galGal1/$2 /bin/cp $2 /tmp/galGal1/$2/ cd /tmp/galGal1/$2 /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -spec chicken $2 popd /bin/cp /tmp/galGal1/$2/$2.out ./ if (-e /tmp/galGal1/$2/$2.align) /bin/cp /tmp/galGal1/$2/$2.align ./ if (-e /tmp/galGal1/$2/$2.tbl) /bin/cp /tmp/galGal1/$2/$2.tbl ./ if (-e /tmp/galGal1/$2/$2.cat) /bin/cp /tmp/galGal1/$2/$2.cat ./ /bin/rm -fr /tmp/galGal1/$2/* /bin/rmdir --ignore-fail-on-non-empty /tmp/galGal1/$2 /bin/rmdir --ignore-fail-on-non-empty /tmp/galGal1 '_EOF_' # << this line makes emacs coloring happy chmod +x jkStuff/RMChicken mkdir RMRun rm -f RMRun/RMJobs touch RMRun/RMJobs foreach c (`cat chrom.lst`) foreach d ($c/chr${c}{,_random}_?{,?}) set ctg = $d:t foreach f ( $d/${ctg}_?{,?}.fa ) set f = $f:t echo /cluster/data/galGal1/jkStuff/RMChicken \ /cluster/data/galGal1/$d $f \ '{'check out line+ /cluster/data/galGal1/$d/$f.out'}' \ >> RMRun/RMJobs end end end #- Do the run ssh kk cd /cluster/data/galGal1/RMRun para create RMJobs para try, para check, para check, para push, para check,... #Completed: 2783 of 2783 jobs #Average job time: 1448s 24.13m 0.40h 0.02d #Longest job: 3201s 53.35m 0.89h 0.04d #Submission to last job: 5738s 95.63m 1.59h 0.07d #- Lift up the 500KB chunk .out's to 5MB ("pseudo-contig") level ssh kksilo cd /cluster/data/galGal1 foreach d (*/chr*_?{,?}) set contig = $d:t echo $contig liftUp $d/$contig.fa.out $d/$contig.lft warn $d/${contig}_*.fa.out \ > /dev/null end #- Lift pseudo-contigs to chromosome level foreach c (`cat chrom.lst`) echo lifting $c cd $c if (-e lift/ordered.lft && ! -z lift/ordered.lft) then liftUp chr$c.fa.out lift/ordered.lft warn `cat lift/oOut.lst` \ > /dev/null endif if (-e lift/random.lft && ! -z lift/random.lft) then liftUp chr${c}_random.fa.out lift/random.lft warn `cat lift/rOut.lst` \ > /dev/null endif cd .. end #- Load the .out files into the database with: ssh hgwdev cd /cluster/data/galGal1 hgLoadOut galGal1 */chr*.fa.out # VERIFY REPEATMASKER RESULTS (DONE 1/30/04 angie) # Eyeball some repeat annotations in the browser, compare to lib seqs. # Run featureBits on galGal1 and on a comparable genome build, and compare: ssh hgwdev featureBits galGal1 rmsk #103553237 bases of 1054197620 (9.823%) in intersection # No comparable species -- in conf call 01/21/04, Arian said about # 8-10% should be covered. # MAKE LIFTALL.LFT (DONE 1/30/04 angie) ssh kksilo cd /cluster/data/galGal1 cat */lift/{ordered,random}.lft > jkStuff/liftAll.lft # PUT UNMASKED CHUNKS ON BLUEARC (DONE 1/30/04 angie) # Some of Terry's staging scripts require a flat dir containing # all contigs; also, doesn't hurt to set aside unmasked seq. ssh kksilo cd /cluster/data/galGal1 mkdir /cluster/bluearc/galGal1/unmaskedChunks foreach d (*/chr*_?{,?}) cp -p $d/${d:t}.fa /cluster/bluearc/galGal1/unmaskedChunks end # MAKE STS MARKERS TRACK (IN PROGRESS 1/30/04 angie -- need more info from Terry) # Doing a dry run to see if I can duplicate Terry's mapping of # markers to the prelim release. set base = /cluster/data/galGal1/bed/markers mkdir -p $base/primers $base/sts # Need to find out how Terry built these: cp -p /cluster/bluearc/gg1/markers/sts/convert.pl $base cp -p /cluster/bluearc/gg1/markers/stsAlias.bed $base cp -p /cluster/bluearc/gg1/markers/stsInfoGG.bed $base cp -p /cluster/bluearc/gg1/10.ooc $base # Need to find out where Terry got these files: cp -p /cluster/bluearc/gg1/markers/primers/all.primers{,.fa} $base/primers cp -p /cluster/bluearc/gg1/markers/sts/all.sequence.fa $base/sts # Run e-PCR on primers mkdir $base/primers/epcr cd $base/primers/epcr mkdir epcr.out /cluster/bin/scripts/createCloneList \ /cluster/bluearc/galGal1/unmaskedChunks # That script creates in.lst/$n.in files, where $n is # the contigs number *plus 1*! in.lst/0.in is empty and makes # para create complain, so get rid of it. rm in.lst/0.in egrep -v '/0.in$' files.lst > tmp.lst; mv tmp.lst files.lst echo $base/primers/all.primers > epcr.lst cat << '_EOF_' > template #LOOP /cluster/bin/scripts/runEpcr {check in line+ $(path1)} {check in line+ $(path2)} {check out exists epcr.out/$(root2).epcr} #ENDLOOP '_EOF_' # << this line keeps emacs coloring happy /cluster/bin/i386/gensub2 epcr.lst files.lst template jobList ssh kk set base = /cluster/data/galGal1/bed/markers cd $base/primers/epcr para create jobList para try, check, push, check, ... #Completed: 251 of 251 jobs #Average job time: 30s 0.50m 0.01h 0.00d #Longest job: 45s 0.75m 0.01h 0.00d #Submission to last job: 46s 0.77m 0.01h 0.00d cat `ls -1 epcr.out/* | sort -g` > all.epcr # blat primers -- with old blat.2 ! mkdir $base/primers/blat cd $base/primers/blat mkdir primers.out ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst echo $base/primers/all.primers.fa > primers.lst cat << '_EOF_' > template #LOOP /cluster/home/kent/bin/i386/blat.2 -tileSize=10 -ooc=../../10.ooc -minMatch=1 -minScore=1 -minIdentity=75 {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ primers.out/$(root1).$(root2).psl} #ENDLOOP '_EOF_' # << this line keeps emacs coloring happy /cluster/bin/i386/gensub2 contigs.lst primers.lst template jobList ssh kk set base = /cluster/data/galGal1/bed/markers cd $base/primers/blat para create jobList para try, check, push, check, ... #Completed: 261 of 261 jobs #Average job time: 18s 0.30m 0.00h 0.00d #Longest job: 27s 0.45m 0.01h 0.00d #Submission to last job: 28s 0.47m 0.01h 0.00d /cluster/bin/i386/pslSort dirs primers.psl temp primers.out rm -r temp # Make primers.final from e-PCR and blat results: cd $base/primers filterPrimers -chicken ../stsInfoGG.bed blat/primers.psl all.primers \ epcr/all.epcr > primers.psl.filter extractPslInfo primers.psl.filter mv primers.psl.filter.initial primers.initial sort -k 4n primers.initial > primers.final # blat sts sequences mkdir $base/sts/blat cd $base/sts/blat mkdir stsMarkers.out ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst echo $base/sts/all.sequence.fa > stsMarkers.lst cat << '_EOF_' > template #LOOP /cluster/bin/i386/blat {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ stsMarkers.out/$(root1).psl} #ENDLOOP '_EOF_' # << this line keeps emacs coloring happy /cluster/bin/i386/gensub2 contigs.lst stsMarkers.lst template jobList ssh kk set base = /cluster/data/galGal1/bed/markers cd $base/sts/blat para create jobList para try, check, push, check, ... #Completed: 261 of 261 jobs #Average job time: 96s 1.59m 0.03h 0.00d #Longest job: 199s 3.32m 0.06h 0.00d #Submission to last job: 199s 3.32m 0.06h 0.00d /cluster/bin/i386/pslSort dirs raw.psl temp stsMarkers.out /cluster/bin/i386/pslReps -nearTop=0.01 -minCover=0.6 -minAli=0.8 \ -noIntrons raw.psl stsMarkers.psl /dev/null rm -rf temp rm -f raw.psl cd $base/sts filterPsl blat/stsMarkers.psl extractUniqPsl stsMarkers.psl.filter stsMarkers.psl.filter pslReps stsMarkers.psl.filter.dup.psl stsMarkers.psl.filter.1 /dev/null \ -noIntrons -minCover=0.80 -minAli=0.90 -nearTop=0.05 mv stsMarkers.psl.filter stsMarkers.psl.filter.orig cat stsMarkers.psl.filter.1 stsMarkers.psl.filter.uniq.psl \ > stsMarkers.psl.filter extractPslInfo -h stsMarkers.psl.filter rm stsMarkers.psl.filter.1* rm stsMarkers.psl.filter.dup.psl stsMarkers.psl.filter.names \ stsMarkers.psl.filter.uniq.psl extractPslInfo -h stsMarkers.psl.filter mv stsMarkers.psl.filter.initial stsMarkers.initial sort -k 4n stsMarkers.initial > stsMarkers.final.orig ../convert.pl ../stsAlias.bed stsMarkers.final.orig > stsMarkers.final cd $base combineSeqPrimerPos $base/sts/stsMarkers.final \ $base/primers/primers.final > stsMarkers_pos.rdb # This step needs work -- output is 0-length createSTSbed stsInfoGG.bed stsMarkers_pos.rdb > stsMap.bed ssh hgwdev cd /cluster/data/galGal1/bed/markers # Make an stsMarkersTmp so we can at least see the locations awk '{printf "%s\t%s\t%s\t%s\t%d\n", $1, $2, $3, $4, $5 * 1000};' \ sts/stsMarkers.final.orig \ | liftUp -type=.bed stsMarkersTmp.bed ../../jkStuff/liftAll.lft warn stdin hgLoadBed galGal1 stsMarkerTmp stsMarkersTmp.bed # lift & load all_sts_primer liftUp primers.filter.lifted.psl ../../jkStuff/liftAll.lft warn \ primers/primers.psl.filter hgLoadPsl -table=all_sts_primer -fastLoad \ galGal1 primers.filter.lifted.psl # lift & load all_sts_seq liftUp stsMarkers.filter.lifted.psl ../../jkStuff/liftAll.lft warn \ sts/stsMarkers.psl.filter hgLoadPsl -table=all_sts_seq -fastLoad \ galGal1 stsMarkers.filter.lifted.psl # load sequences mkdir /gbdb/galGal1/sts ln -s /cluster/data/galGal1/bed/markers/sts/all.sequence.fa \ /gbdb/galGal1/sts/ ln -s /cluster/data/galGal1/bed/markers/primers/all.primers.fa \ /gbdb/galGal1/sts/ hgLoadSeq galGal1 /gbdb/galGal1/sts/*.fa # BACENDS (DONE 1/31/04 angie) ssh kksilo mkdir /cluster/data/galGal1/bed/bacends cd /cluster/data/galGal1/bed/bacends # download wget ftp://ftp.ncbi.nih.gov/genomes/BACENDS/gallus_gallus/AllBACends.mfa.gz wget ftp://ftp.ncbi.nih.gov/genomes/BACENDS/gallus_gallus/cl_acc_gi_len.gz gunzip *.gz /cluster/bin/scripts/convertBacEndPairInfo cl_acc_gi_len >& pairInfo.log # Next time around, don't keep the version number, so that the # bacends.fa accs will match the cl_acc_gi_len accs. Use this # replacement instead: 's/^>gi\|\d+\|gb\|(\w+)\.\d+\|.*/>$1/' perl -wpe 's/^>gi\|\d+\|gb\|(\w+\.\d+)\|.*/>$1/' AllBACends.mfa \ > bacends.fa faSize bacends.fa #146921771 bases (2724913 N's 144196858 real) in 135555 sequences in 1 files #Total size: mean 1083.9 sd 184.7 min 64 (CC322877.1) max 2431 (CC263290.1) median 1095 #N count: mean 20.1 sd 73.1 mkdir /cluster/bluearc/galGal1/bacends faSplit sequence bacends.fa 20 /cluster/bluearc/galGal1/bacends/bacends ls -1S /cluster/bluearc/galGal1/bacends/*.fa > bacends.lst ls -1S /cluster/bluearc/galGal1/unmaskedChunks/*.fa > contigs.lst cat << '_EOF_' > template #LOOP /cluster/bin/i386/blat $(path1) $(path2) -tileSize=10 -ooc=../markers/10.ooc {check out line+ psl/$(root1)_$(root2).psl} #ENDLOOP '_EOF_' # << this line keeps emacs coloring happy mkdir psl gensub2 contigs.lst bacends.lst template jobList ssh kk cd /cluster/data/galGal1/bed/bacends para create jobList para try, check, push, check, ... #Completed: 5220 of 5220 jobs #Average job time: 105s 1.75m 0.03h 0.00d #Longest job: 2431s 40.52m 0.68h 0.03d #Submission to last job: 2608s 43.47m 0.72h 0.03d # back on kksilo, filter and lift results: pslSort dirs raw.psl temp psl pslReps -nearTop=0.02 -minCover=0.60 -minAli=0.85 -noIntrons raw.psl \ bacEnds.psl /dev/null liftUp bacEnds.lifted.psl ../../jkStuff/liftAll.lft warn bacEnds.psl # pslSort ?? rm -r temp rm raw.psl extractUniqPsl bacEnds.lifted.psl bacEnds.psl.filter pslReps bacEnds.psl.filter.dup.psl bacEnds.psl.filter.1 /dev/null \ -noIntrons -minCover=0.80 -minAli=0.90 -nearTop=0.05 extractUniqPsl bacEnds.psl.filter.1 bacEnds.psl.filter.1 rm bacEnds.psl.filter.1 pslReps bacEnds.psl.filter.1.dup.psl bacEnds.psl.filter.2 /dev/null \ -noIntrons -minCover=0.90 -minAli=0.95 -nearTop=0.01 extractUniqPsl bacEnds.psl.filter.2 bacEnds.psl.filter.2 rm bacEnds.psl.filter.2 pslReps bacEnds.psl.filter.2.dup.psl bacEnds.psl.filter.3 /dev/null \ -noIntrons -minCover=0.95 -minAli=0.98 -nearTop=0.01 cat bacEnds.psl.filter.3 \ bacEnds.psl.filter.uniq.psl \ bacEnds.psl.filter.1.uniq.psl \ bacEnds.psl.filter.2.uniq.psl \ > bacEnds.psl.filter.lifted rm *.names bacEnds.psl.filter.3 bacEnds.psl.filter.uniq.psl \ bacEnds.psl.filter.1.uniq.psl bacEnds.psl.filter.2.uniq.psl extractPslInfo -h -be bacEnds.psl.filter.lifted mv bacEnds.psl.filter.lifted.initial bacEnds.initial # Oops, should have stripped the version from bacends.fa acc's so # they would match cl_acc_gi_len accs! perl -wpe 's/^(\w+\s+\d+\s+\d+\s+\w+)\.\d+/$1/' bacEnds.initial \ > bacEndsNoVers.initial mv bacEndsNoVers.initial bacEnds.initial findAccession -agp -chicken bacEnds.initial /cluster/data/galGal1 # rm bacEnds.initial sort -k 4 bacEnds.initial.acc > bacEnds.sort compileBacEnds bacEnds.sort bacEndPairs.txt bacEndSingles.txt \ > bacEnds.temp sorttbl chrom chromStart < bacEnds.temp > bacEnds.rdb rm bacEnds.initial.acc bacEnds.sort bacEnds.temp createBACpairs bacEnds.rdb bacEndPairs.txt > temp.rdb sorttbl chrom1 -r ord1 chromStart1 < temp.rdb > bacEnds.pairs.rdb rm temp.rdb createBACpairs -bed bacEnds.rdb bacEndPairs.txt > bacEndPairs.rdb sorttbl chrom chromStart < bacEndPairs.rdb \ | headchg -del > bacEndPairs.bed rm bacEndPairs.rdb createBacPairsBad -bed bacEnds.rdb bacEndPairs.txt > bacEndPairsBad.rdb sorttbl chrom chromStart < bacEndPairsBad.rdb \ | headchg -del > bacEndPairsBad.bed rm bacEndPairsBad.rdb sorttbl chrom chromStart < bacEndPairsLong.rdb \ | headchg -del > bacEndPairsLong.bed rm bacEndPairsLong.rdb # This made something 0-length, but I'm not going to pursue for now: findMissingEnd -max 500000 singleBac.txt bacEnds.psl.filter.2.dup.psl \ bacEnds.psl.filter.1.dup.psl bacEnds.psl.filter.dup.psl > bacEndsMiss.psl mv bacEnds.psl.filter.lifted bacEnds.psl.filter.lifted.orig cat bacEnds.psl.filter.lifted.orig bacEndsMiss.psl \ > bacEnds.psl.filter.lifted rm bacEnds.psl.filter.2.dup.psl bacEnds.psl.filter.1.dup.psl \ bacEnds.psl.filter.dup.psl # Doh... again, having to strip those damn version numbers: perl -wpe \ '@w=split(/\t/); if (defined $w[9]){ $w[9] =~ s/(\w+)\.\d+/$1/; } \ $_=join("\t",@w)' \ bacEnds.psl.filter.lifted > tmp mv tmp bacEnds.psl.filter.lifted extractPslLoad bacEnds.psl.filter.lifted bacEndPairs.bed \ bacEndPairsBad.bed bacEndPairsLong.bed > bacEnds.psl.rdb sorttbl tname tstart < bacEnds.psl.rdb | headchg -del > bacEnds.load.psl rm bacEnds.psl.rdb # OK, now before loading sequences, we really have to fix bacends.fa: perl -wpe 's/^>(\w+)\.\d+/>$1/' bacends.fa \ > tmp mv tmp bacends.fa # Load seq's and alignments ssh hgwdev cd /cluster/data/galGal1/bed/bacends mkdir /gbdb/galGal1/bacends ln -s /cluster/data/galGal1/bed/bacends/bacends.fa /gbdb/galGal1/bacends hgLoadSeq galGal1 /gbdb/galGal1/bacends/bacends.fa hgLoadPsl -table=all_bacends -fastLoad \ galGal1 /cluster/data/galGal1/bed/bacends/bacEnds.load.psl # Don't know which two rows prompted this... but all seem to have loaded. #load of all_bacends did not go as planned: 76325 record(s), 0 row(s) skipped, 2 warning(s) loading psl.tab ~/bin/i386/hgLoadBed -sqlTable=$HOME/kent/src/hg/lib/bacEndPairs.sql \ -hasBin galGal1 bacEndPairs bacEndPairs.bed sed -e 's/bacEndPairs/bacEndPairsBad/g' \ $HOME/kent/src/hg/lib/bacEndPairs.sql > bacEndPairsBad.sql ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsBad.sql \ -hasBin galGal1 bacEndPairsBad bacEndPairsBad.bed # Doh! 12 lines of that have negative starts. Well, trim those # for now and load the rest, then go back and find the neg-start # cause... egrep -v ' -[0-9]+' bacEndPairsBad.bed > bacEndPairsBadFudge.bed ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsBad.sql \ -hasBin galGal1 bacEndPairsBad bacEndPairsBadFudge.bed sed -e 's/bacEndPairs/bacEndPairsLong/g' \ $HOME/kent/src/hg/lib/bacEndPairs.sql > bacEndPairsLong.sql ~/bin/i386/hgLoadBed -sqlTable=bacEndPairsLong.sql \ -hasBin galGal1 bacEndPairsLong bacEndPairsLong.bed # SIMPLE REPEATS (TRF) (DONE 1/30/04 angie) # TRF runs pretty quickly now... it takes a few hours total runtime, # so instead of binrsyncing and para-running, just do this on the # local fileserver ssh kksilo mkdir /cluster/data/galGal1/bed/simpleRepeat cd /cluster/data/galGal1/bed/simpleRepeat mkdir trf cp /dev/null jobs.csh foreach d (/cluster/data/galGal1/*/chr*_?{,?}) set ctg = $d:t foreach f ($d/${ctg}.fa) set fout = $f:t:r.bed echo $fout echo "/cluster/bin/i386/trfBig -trf=/cluster/bin/i386/trf $f /dev/null -bedAt=trf/$fout -tempDir=/tmp" \ >> jobs.csh end end csh jobs.csh >&! jobs.log & # check on this with tail -f jobs.log wc -l jobs.csh ls -1 trf | wc -l # When job is done do: liftUp simpleRepeat.bed /cluster/data/galGal1/jkStuff/liftAll.lft warn \ trf/*.bed # Load into the database: ssh hgwdev hgLoadBed galGal1 simpleRepeat \ /cluster/data/galGal1/bed/simpleRepeat/simpleRepeat.bed \ -sqlTable=$HOME/src/hg/lib/simpleRepeat.sql featureBits galGal1 simpleRepeat # 8436953 bases of 1054197620 (0.800%) in intersection # Seems awful low even though chicken is supposed to be a # low-repeat-density animal. Run it by someone... # PROCESS SIMPLE REPEATS INTO MASK (DONE 1/30/04/ angie) # After the simpleRepeats track has been built, make a filtered version # of the trf output: keep trf's with period <= 12: ssh kksilo cd /cluster/data/galGal1/bed/simpleRepeat mkdir -p trfMask foreach f (trf/chr*.bed) awk '{if ($5 <= 12) print;}' $f > trfMask/$f:t end # Lift up filtered trf output to chrom coords as well: cd /cluster/data/galGal1 mkdir bed/simpleRepeat/trfMaskChrom foreach c (`cat chrom.lst`) if (-e $c/lift/ordered.lst) then perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \ $c/lift/ordered.lst > $c/lift/oTrf.lst liftUp bed/simpleRepeat/trfMaskChrom/chr$c.bed \ jkStuff/liftAll.lft warn `cat $c/lift/oTrf.lst` endif if (-e $c/lift/random.lst) then perl -wpe 's@(\S+)@bed/simpleRepeat/trfMask/$1.bed@' \ $c/lift/random.lst > $c/lift/rTrf.lst liftUp bed/simpleRepeat/trfMaskChrom/chr${c}_random.bed \ jkStuff/liftAll.lft warn `cat $c/lift/rTrf.lst` endif end # Here's the coverage for the filtered TRF: ssh hgwdev cat /cluster/data/galGal1/bed/simpleRepeat/trfMaskChrom/*.bed \ > /tmp/filtTrf.bed featureBits galGal1 /tmp/filtTrf.bed # 4512560 bases of 1054197620 (0.428%) in intersection # MASK SEQUENCE WITH REPEATMASKER AND SIMPLE REPEAT/TRF (DONE 1/31/04 angie) ssh kksilo cd /cluster/data/galGal1 # Soft-mask (lower-case) the contig and chr .fa's, # then make hard-masked versions from the soft-masked. set trfCtg=bed/simpleRepeat/trfMask set trfChr=bed/simpleRepeat/trfMaskChrom foreach f (*/chr*.fa) echo "repeat- and trf-masking $f" maskOutFa -soft $f $f.out $f set chr = $f:t:r maskOutFa -softAdd $f $trfChr/$chr.bed $f echo "hard-masking $f" maskOutFa $f hard $f.masked end foreach c (`cat chrom.lst`) echo "repeat- and trf-masking contigs of chr$c, chr${c}_random" foreach d ($c/chr*_?{,?}) set ctg=$d:t set f=$d/$ctg.fa maskOutFa -soft $f $f.out $f maskOutFa -softAdd $f $trfCtg/$ctg.bed $f maskOutFa $f hard $f.masked end end #- Rebuild the nib files, using the soft masking in the fa: mkdir nib foreach f (*/chr*.fa) faToNib -softMask $f nib/$f:t:r.nib end # GOLD AND GAP TRACKS (DONE 1/30/04 angie) ssh hgwdev cd /cluster/data/galGal1 hgGoldGapGl -noGl -chromLst=chrom.lst galGal1 /cluster/data/galGal1 . # featureBits fails if there's no chrM_gap, so make one: # echo "create table chrM_gap like chr1_gap" | hgsql galGal1 # oops, that won't work until v4.1, so do this for the time being: echo "create table chrM_gap select * from chr1_gap where 0=1" \ | hgsql galGal1 # MAKE GCPERCENT (DONE 1/30/04 angie -- DROPPED 2/4/04, replaced by gc20kBase) ssh hgwdev mkdir /cluster/data/galGal1/bed/gcPercent cd /cluster/data/galGal1/bed/gcPercent hgsql galGal1 < $HOME/kent/src/hg/lib/gcPercent.sql hgGcPercent galGal1 ../../nib # MAKE HGCENTRALTEST ENTRY AND TRACKDB TABLE FOR GALGAL1 (DONE 1/28/04 angie) ssh hgwdev # Add dbDb and defaultDb entries: echo 'insert into dbDb (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk) \ values("galGal1", "Jan. 2004", \ "/gbdb/galGal1/nib", "Chicken", "chr13:13124250-13128200", 1, \ 35, "Chicken", "Gallus gallus", \ "/gbdb/galGal1/html/description.html", 0);' \ | hgsql -h genome-testdb hgcentraltest echo 'insert into defaultDb values("Chicken", "galGal1");' \ | hgsql -h genome-testdb hgcentraltest # Make trackDb table so browser knows what tracks to expect: ssh hgwdev cd $HOME/kent/src/hg/makeDb/trackDb cvs up -d -P # Edit that makefile to add galGal1 in all the right places and do make update make alpha cvs commit makefile # MAKE HGCENTRALTEST BLATSERVERS ENTRY FOR GALGAL1 (TODO) ssh hgwdev echo 'insert into blatServers values("galGal1", "blat?", "17778", "1"); \ insert into blatServers values("galGal1", "blat?", "17779", "0");' \ | hgsql -h genome-testdb hgcentraltest # MAKE DESCRIPTION/SAMPLE POSITION HTML PAGE (DONE 1/28/04 angie) ssh hgwdev mkdir /gbdb/galGal1/html # Write ~/kent/src/hg/makeDb/trackDb/chicken/galGal1/description.html # with a description of the assembly and some sample position queries. chmod a+r $HOME/kent/src/hg/makeDb/trackDb/chicken/galGal1/description.html # Check it in and copy (ideally using "make alpha" in trackDb) to # /gbdb/galGal1/html # MAKE DOWNLOADABLE SEQUENCE FILES (TODO) ssh kksilo cd /cluster/data/galGal1 #- Build the .zip files cat << '_EOF_' > jkStuff/zipAll.csh rm -rf zip mkdir zip zip -j zip/chromAgp.zip */chr*.agp zip -j zip/chromOut.zip */chr*.fa.out zip -j zip/chromFa.zip */chr*.fa zip -j zip/chromFaMasked.zip */chr*.fa.masked cd bed/simpleRepeat zip ../../zip/chromTrf.zip trfMaskChrom/chr*.bed cd ../.. cd /cluster/data/genbank ./bin/i386/gbGetSeqs -db=galGal1 -native GenBank mrna \ /cluster/data/galGal1/zip/mrna.fa cd /cluster/data/galGal1/zip zip -j mrna.zip mrna.fa '_EOF_' # << this line makes emacs coloring happy csh ./jkStuff/zipAll.csh |& tee zip/zipAll.log cd zip #- Look at zipAll.log to make sure all file lists look reasonable. #- Check zip file integrity: foreach f (*.zip) unzip -t $f > $f.test tail -1 $f.test end wc -l *.zip.test #- Copy the .zip files to hgwdev:/usr/local/apache/... ssh hgwdev cd /cluster/data/galGal1/zip set gp = /usr/local/apache/htdocs/goldenPath/galGal1 mkdir -p $gp/chromosomes foreach f ( ../*/chr*.fa ) zip -j $gp/chromosomes/$f:t.zip $f end mkdir -p $gp/bigZips cp -p *.zip $gp/bigZips cd $gp # Take a look at bigZips/* and chromosomes/*, update their README.txt's # Can't make refGene upstream sequence files - no refSeq for chicken. # Maybe ensGene when we get that. # PUT MASKED SEQUENCE OUT FOR CLUSTER RUNS (DONE 1/31/04 angie) ssh kksilo # Chrom-level mixed nibs that have been repeat- and trf-masked: rm -rf /cluster/bluearc/galGal1/nib mkdir /cluster/bluearc/galGal1/nib cp -p nib/chr*.nib /cluster/bluearc/galGal1/nib # Pseudo-contig fa that have been repeat- and trf-masked: rm -rf /cluster/bluearc/galGal1/trfFa mkdir /cluster/bluearc/galGal1/trfFa foreach d (*/chr*_?{,?}) cp $d/$d:t.fa /cluster/bluearc/galGal1/trfFa end # MAKE CHICKEN LINEAGE-SPECIFIC REPEATS (DONE 2/18/04 angie) # In an email 2/13/04, Arian said we could treat all human repeats as # lineage-specific, but could exclude these from chicken as ancestral: # L3, L3a, L3b, MIR, MIR3, MIRb, MIRm ssh kksilo cd /cluster/data/galGal1 mkdir /cluster/bluearc/galGal1/linSpecRep foreach f (*/chr*.fa.out) awk '$10 !~/^(L3|L3a|L3b|MIR|MIR3|MIRb|MIRm)$/ {print;}' $f \ > /cluster/bluearc/galGal1/linSpecRep/$f:t:r:r.out.spec end # SWAP BLASTZ HUMAN-CHICKEN TO CHICKEN-HUMAN (HG16) (DONE 2/19/04 angie) ssh kolossus mkdir /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18 cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18 set aliDir = /cluster/data/hg16/bed/blastz.galGal1.2004-02-18 cp $aliDir/S1.len S2.len cp $aliDir/S2.len S1.len mkdir unsorted axtChrom cat $aliDir/axtChrom/chr*.axt \ | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout \ | axtSplitByTarget stdin unsorted # Sort the shuffled .axt files. foreach f (unsorted/*.axt) echo sorting $f:t:r axtSort $f axtChrom/$f:t end du -sh $aliDir/axtChrom unsorted axtChrom #1.4G /cluster/data/hg16/bed/blastz.galGal1.2004-02-18/axtChrom #1.4G unsorted #1.4G axtChrom rm -r unsorted # CHAIN HUMAN BLASTZ (DONE 2/19/04 angie) # Run axtChain on little cluster ssh kkr1u00 cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18 mkdir -p axtChain/run1 cd axtChain/run1 mkdir out chain ls -1S /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChrom/*.axt \ > input.lst cat << '_EOF_' > gsub #LOOP doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out} #ENDLOOP '_EOF_' # << this line makes emacs coloring happy # Make our own linear gap file with reduced gap penalties, # in hopes of getting longer chains: cat << '_EOF_' > ../../chickenHumanTuned.gap tablesize 11 smallSize 111 position 1 2 3 11 111 2111 12111 32111 72111 152111 252111 qGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600 tGap 325 360 400 450 600 1100 3600 7600 15600 31600 56600 bothGap 625 660 700 750 900 1400 4000 8000 16000 32000 57000 '_EOF_' # << this line makes emacs coloring happy cat << '_EOF_' > doChain #!/bin/csh axtFilter -notQ=chrUn_random $1 \ | ~/bin/i386/axtChain -scoreScheme=/cluster/data/blastz/HoxD55.q \ -linearGap=../../chickenHumanTuned.gap \ -minScore=5000 stdin \ /cluster/bluearc/galGal1/nib \ /cluster/bluearc/scratch/hg/gs.17/build34/bothMaskedNibs $2 > $3 '_EOF_' # << this line makes emacs coloring happy chmod a+x doChain gensub2 input.lst single gsub jobList para create jobList para try, check, push, check... #Completed: 45 of 45 jobs #Average job time: 292s 4.86m 0.08h 0.00d #Longest job: 906s 15.10m 0.25h 0.01d #Submission to last job: 2233s 37.22m 0.62h 0.03d # now on the cluster server, sort chains ssh kksilo cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain chainMergeSort run1/chain/*.chain > all.chain chainSplit chain all.chain rm run1/chain/*.chain # take a look at score distr's foreach f (chain/*.chain) grep chain $f | awk '{print $2;}' | sort -nr > /tmp/score.$f:t:r echo $f:t:r textHistogram -binSize=10000 /tmp/score.$f:t:r echo "" end # Load chains into database ssh hgwdev cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain/chain foreach i (*.chain) set c = $i:r echo loading $c hgLoadChain galGal1 ${c}_chainHg16 $i end # NET HUMAN BLASTZ (DONE 2/19/04 angie) ssh kksilo cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain chainPreNet all.chain ../S1.len ../S2.len stdout \ | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \ | netSyntenic stdin noClass.net # Add classification info using db tables: ssh hgwdev cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain netClass noClass.net galGal1 hg16 human.net # Make a 'syntenic' subset: ssh kksilo cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain rm noClass.net # Make a 'syntenic' subset of these with netFilter -syn human.net > humanSyn.net # Load the nets into database ssh hgwdev cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18/axtChain netFilter -minGap=10 human.net | hgLoadNet galGal1 netHg16 stdin netFilter -minGap=10 humanSyn.net | hgLoadNet galGal1 netSyntenyHg16 stdin # Add entries for chaing16, netHg16, syntenyHg16 to chicken/galGal1 trackDb # RUN AXTBEST ON CHICKEN/HUMAN (DONE 2/19/04 angie) ssh kolossus cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18 mkdir axtBest pslBest foreach f (axtChrom/chr*.axt) set chr=$f:t:r echo axtBesting $chr axtBest $f $chr axtBest/$chr.axt -minScore=300 axtToPsl axtBest/$chr.axt S1.len S2.len pslBest/$chr.psl end ssh hgwdev cd /cluster/data/galGal1/bed/blastz.hg16.swap.2004-02-18 cat pslBest/chr*.psl | hgLoadPsl -table=blastzBestHg16 galGal1 stdin # BLASTZ SELF (DONE 2/23/04 angie) # lavToAxt has a new -dropSelf option that *will* replace axtDropOverlap, # once I fix a bug in it... ssh kk mkdir -p /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 # Abridge repeats (linSpecRep relative to human but should be fine right?) cat << '_EOF_' > DEF # chicken vs. chicken export PATH=/usr/bin:/bin:/usr/local/bin:/cluster/bin/penn:/cluster/bin/i386:/cluster/home/angie/schwartzbin ALIGN=blastz-run BLASTZ=blastz BLASTZ_H=2000 BLASTZ_ABRIDGE_REPEATS=1 # TARGET # Chicken SEQ1_DIR=/cluster/bluearc/galGal1/nib SEQ1_RMSK= SEQ1_FLAG= SEQ1_SMSK=/cluster/bluearc/galGal1/linSpecRep SEQ1_IN_CONTIGS=0 SEQ1_CHUNK=10000000 SEQ1_LAP=10000 # QUERY # Human SEQ2_DIR=/cluster/bluearc/galGal1/nib SEQ2_RMSK= SEQ2_FLAG= SEQ2_SMSK=/cluster/bluearc/galGal1/linSpecRep SEQ2_IN_CONTIGS=0 SEQ2_CHUNK=10000000 SEQ2_LAP=10000 BASE=/cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 DEF=$BASE/DEF RAW=$BASE/raw CDBDIR=$BASE SEQ1_LEN=$BASE/S1.len SEQ2_LEN=$BASE/S2.len '_EOF_' # << this line makes emacs coloring happy # Save the DEF file in the current standard place chmod +x DEF cp DEF ~angie/hummus/DEF.galGal1-galGal1 # source the DEF file bash . ./DEF mkdir -p $BASE/run ~angie/hummus/make-joblist $DEF > $BASE/run/j sh $BASE/xdir.sh cd $BASE/run # now edit j to prefix path to executable name sed -e 's#^#/cluster/bin/penn/#' j > j2 wc -l j* head j2 # make sure the j2 edits are OK, then use it: mv j2 j para create j para try, check, push, check, ... #Completed: 21025 of 21025 jobs #Average job time: 400s 6.67m 0.11h 0.00d #Longest job: 1473s 24.55m 0.41h 0.02d #Submission to last job: 9966s 166.10m 2.77h 0.12d # --- normalize (PennSpeak for lift) ssh kkr1u00 cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 # run bash shell if not running it already source DEF mkdir -p $BASE/run.1 mkdir -p $BASE/lav # create a new job list to convert out files to lav /cluster/bin/scripts/blastz-make-out2lav $DEF $BASE \ > run.1/jobList cd run.1 wc -l jobList head jobList # make sure the job list is OK para create jobList para push #Completed: 145 of 145 jobs #Average job time: 46s 0.77m 0.01h 0.00d #Longest job: 328s 5.47m 0.09h 0.00d #Submission to last job: 1192s 19.87m 0.33h 0.01d # convert lav files to axt ssh kkr1u00 cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 mkdir axtChrom # a new run directory mkdir run.2 cd run.2 # Use blastz-self-chromlav2axt, which includes axtDropOverlap in # the pipe (that helps to keep axtSort from barfing). # usage: blastz-self-chromlav2axt lav-dir axt-file seq1-dir seq2-dir cat << '_EOF_' > gsub #LOOP /cluster/bin/scripts/blastz-self-chromlav2axt /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/lav/$(root1) {check out line+ /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChrom/$(root1).axt} /cluster/bluearc/galGal1/nib /cluster/bluearc/galGal1/nib /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/S1.len /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/S2.len #ENDLOOP '_EOF_' # << this line makes emacs coloring happy \ls -1S /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/lav > chrom.list gensub2 chrom.list single gsub jobList wc -l jobList head jobList para create jobList para try, check, push, check,... #Completed: 46 of 47 jobs #Crashed: 1 jobs #Average job time: 34s 0.56m 0.01h 0.00d #Longest job: 294s 4.90m 0.08h 0.00d #Submission to last job: 668s 11.13m 0.19h 0.01d # chrUn crashed -- run on kolossus, in two passes! # actually, it was a segv in lavToAxt (fixed), but this works anyway: ssh kolossus cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 set base=`pwd` set seq1_dir=/cluster/data/galGal1/nib set seq2_dir=/cluster/data/galGal1/nib foreach chr (chrUn) set out=axtChrom/$chr.axt echo "Translating $chr lav to $out" foreach d (lav/$chr/*.lav) set smallout=$d.axt echo $smallout ~/bin/$MACHTYPE/lavToAxt $d $seq1_dir $seq2_dir stdout \ | axtDropOverlap stdin $base/S1.len $base/S2.len stdout \ | axtSort stdin $smallout end cat `ls -1 lav/$chr/*.lav.axt | sort -g` | axtSort stdin $out end # CHAIN SELF BLASTZ (TODO 2/19/04 angie) # Run axtChain on little cluster ssh kkr1u00 cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 mkdir -p axtChain/run1 cd axtChain/run1 mkdir out chain ls -1S /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChrom/*.axt \ > input.lst cat << '_EOF_' > gsub #LOOP doChain {check in exists $(path1)} {check out line+ chain/$(root1).chain} {check out line+ out/$(root1).out} #ENDLOOP '_EOF_' # << this line makes emacs coloring happy cat << '_EOF_' > doChain #!/bin/csh axtFilter -notQ=chrUn $1 \ | ~/bin/i386/axtChain stdin \ /cluster/bluearc/galGal1/nib \ /cluster/bluearc/galGal1/nib $2 > $3 '_EOF_' # << this line makes emacs coloring happy chmod a+x doChain gensub2 input.lst single gsub jobList para create jobList para try, check, push, check... #Completed: 47 of 47 jobs #Average job time: 140s 2.33m 0.04h 0.00d #Longest job: 766s 12.77m 0.21h 0.01d #Submission to last job: 766s 12.77m 0.21h 0.01d # now on the cluster server, sort chains ssh kksilo cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain chainMergeSort run1/chain/*.chain > all.chain chainSplit chain all.chain rm run1/chain/*.chain # trim to minScore=20000 mkdir chainFilt foreach f (chain/*.chain) chainFilter -minScore=20000 $f > chainFilt/$f:t end # Load chains into database ssh hgwdev cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain/chainFilt foreach i (*.chain) set c = $i:r echo loading $c hgLoadChain galGal1 ${c}_chainSelf $i end # NET SELF BLASTZ (TODO 2/19/04 angie) ssh kksilo cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain chainPreNet all.chain ../S1.len ../S2.len stdout \ | chainNet stdin -minSpace=1 ../S1.len ../S2.len stdout /dev/null \ | netSyntenic stdin noClass.net # Add classification info using db tables: ssh hgwdev cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain netClass -noAr noClass.net galGal1 galGal1 self.net # Make a 'syntenic' subset: ssh kksilo cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain rm noClass.net netFilter -syn self.net > selfSyn.net # Load the nets into database ssh hgwdev cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19/axtChain netFilter -minGap=10 self.net | hgLoadNet galGal1 netSelf stdin netFilter -minGap=10 selfSyn.net | hgLoadNet galGal1 netSyntenySelf stdin # Add entries for chainSelf, netSelf, netSyntenySelf to # chicken/galGal1 trackDb # RUN AXTBEST ON SELF (TODO 2/19/04 angie) # chain/net seem kinda sparse, let's try axtBest... ssh kolossus cd /cluster/data/galGal1/bed/blastz.galGal1.2004-02-19 mkdir axtBest pslBest foreach f (axtChrom/chr*.axt) set chr=$f:t:r echo axtBesting $chr axtBest $f $chr axtBest/$chr.axt -minScore=300 axtToPsl axtBest/$chr.axt S1.len S2.len pslBest/$chr.psl end # MAKE 11.OOC FILE FOR BLAT (DONE 2/1/04 angie) ssh kksilo mkdir /cluster/data/galGal1/bed/ooc cd /cluster/data/galGal1/bed/ooc ls -1 /cluster/bluearc/galGal1/nib/chr*.nib > nib.lst blat nib.lst /dev/null /dev/null -tileSize=11 \ -makeOoc=/cluster/bluearc/galGal1/11.ooc -repMatch=1024 #Wrote 1053 overused 11-mers to /cluster/bluearc/galGal1/11.ooc # AUTO UPDATE GENBANK MRNA RUN (DONE 2/1/04 angie) # Update genbank config and source in CVS: cd ~/kent/src/hg/makeDb/genbank cvsup . # See if /cluster/data/genbank/etc/genbank.conf has had any un-checked-in # edits, check them in if necessary: diff /cluster/data/genbank/etc/genbank.conf etc/genbank.conf # Edit etc/genbank.conf and add these lines: # galGal1 (chicken) galGal1.genome = /cluster/bluearc/galGal1/nib/chr*.nib galGal1.lift = /cluster/data/galGal1/jkStuff/liftAll.lft galGal1.refseq.mrna.native.load = no galGal1.genbank.mrna.xeno.load = yes galGal1.genbank.est.xeno.load = no galGal1.downloadDir = galGal1 cvs ci etc/genbank.conf # Since chicken is a new species for us, edit src/lib/gbGenome.c. # Pick some other browser species, & monkey-see monkey-do. cvs diff src/lib/gbGenome.c make cvs ci src/lib/gbGenome.c # Edit src/align/gbBlat to add /cluster/bluearc/galGal1/11.ooc cvs diff src/align/gbBlat make cvs ci src/align/gbBlat # Install to /cluster/data/genbank: make install-server ssh eieio cd /cluster/data/genbank # This is an -initial run, mRNA only: nice bin/gbAlignStep -iserver=no -clusterRootDir=/cluster/bluearc/genbank \ -srcDb=genbank -type=mrna -verbose=1 -initial galGal1 # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad galGal1 # Clean up: rm -r /cluster/bluearc/genbank/work/initial.galGal1 ssh eieio # -initial for ESTs: nice bin/gbAlignStep -iserver=no -clusterRootDir=/cluster/bluearc/genbank \ -srcDb=genbank -type=est -verbose=1 -initial galGal1 # Load results: ssh hgwdev cd /cluster/data/genbank nice bin/gbDbLoadStep -verbose=1 galGal1 # Clean up: rm -r /cluster/bluearc/genbank/work/initial.galGal1 # PRODUCING GENSCAN PREDICTIONS (DONE 2/3/04 angie) ssh hgwdev mkdir /cluster/data/galGal1/bed/genscan cd /cluster/data/galGal1/bed/genscan # Check out hg3rdParty/genscanlinux to get latest genscan: cvs co hg3rdParty/genscanlinux # Run on small cluster (more mem than big cluster). ssh kkr1u00 # Make 3 subdirectories for genscan to put their output files in mkdir gtf pep subopt # Generate a list file, genome.list, of all the hard-masked contigs that # *do not* consist of all-N's (which would cause genscan to blow up) rm -f genome.list touch genome.list foreach f ( `ls -1S /cluster/data/galGal1/*/chr*_*/chr*_?{,?}.fa.masked` ) egrep '[ACGT]' $f > /dev/null if ($status == 0) echo $f >> genome.list end wc -l genome.list # Create template file, gsub, for gensub2. For example (3-line file): cat << '_EOF_' > gsub #LOOP /cluster/bin/i386/gsBig {check in line+ $(path1)} {check out line gtf/$(root1).gtf} -trans={check out line pep/$(root1).pep} -subopt={check out line subopt/$(root1).bed} -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=2400000 #ENDLOOP '_EOF_' # << this line makes emacs coloring happy gensub2 genome.list single gsub jobList para create jobList para try, check, push, check, ... #Completed: 259 of 261 jobs #Crashed: 2 jobs #Average job time: 2920s 48.67m 0.81h 0.03d #Longest job: 141367s 2356.12m 39.27h 1.64d #Submission to last job: 144447s 2407.45m 40.12h 1.67d # If there are crashes, diagnose with "para problems". # If a job crashes due to genscan running out of memory, re-run it # manually with "-window=1200000" instead of "-window=2400000". /cluster/bin/i386/gsBig /cluster/data/galGal1/5/chr5_5/chr5_5.fa.masked gtf/chr5_5.fa.gtf -trans=pep/chr5_5.fa.pep -subopt=subopt/chr5_5.fa.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=1200000 /cluster/bin/i386/gsBig /cluster/data/galGal1/20/chr20_1/chr20_1.fa.masked gtf/chr20_1.fa.gtf -trans=pep/chr20_1.fa.pep -subopt=subopt/chr20_1.fa.bed -exe=hg3rdParty/genscanlinux/genscan -par=hg3rdParty/genscanlinux/HumanIso.smat -tmp=/tmp -window=1200000 # Convert these to chromosome level files as so: ssh kksilo cd /cluster/data/galGal1/bed/genscan liftUp genscan.gtf ../../jkStuff/liftAll.lft warn gtf/*.gtf liftUp genscanSubopt.bed ../../jkStuff/liftAll.lft warn subopt/*.bed cat pep/*.pep > genscan.pep # Load into the database as so: ssh hgwdev cd /cluster/data/galGal1/bed/genscan ldHgGene galGal1 genscan genscan.gtf hgPepPred galGal1 generic genscanPep genscan.pep hgLoadBed galGal1 genscanSubopt genscanSubopt.bed # PRODUCING FUGU BLAT ALIGNMENTS (DONE 2/6/04) ssh kk mkdir /cluster/data/galGal1/bed/blatFr1 cd /cluster/data/galGal1/bed/blatFr1 ls -1S /cluster/bluearc/fugu/fr1/trfFa/*.fa > fugu.lst ls -1S /cluster/bluearc/galGal1/trfFa/*.fa > chicken.lst cat << '_EOF_' > gsub #LOOP /cluster/bin/i386/blat -mask=lower -q=dnax -t=dnax {check in line+ $(path1)} {check in line+ $(path2)} {check out line+ psl/$(root1)_$(root2).psl} #ENDLOOP '_EOF_' # << this line makes emacs coloring happy mkdir psl gensub2 chicken.lst fugu.lst gsub spec para create spec para try, check, push, check, ... # cluster & bluearc were very busy => slow: #Completed: 1827 of 1827 jobs #Average job time: 1685s 28.08m 0.47h 0.02d #Longest job: 4121s 68.68m 1.14h 0.05d #Submission to last job: 56039s 933.98m 15.57h 0.65d # Sort alignments: ssh kksilo cd /cluster/data/galGal1/bed/blatFr1 pslCat -dir psl \ | liftUp -type=.psl stdout /cluster/data/galGal1/jkStuff/liftAll.lft \ warn stdin \ | pslSortAcc nohead chrom /cluster/store2/temp stdin # load into database: ssh hgwdev cd /cluster/data/galGal1/bed/blatFr1/chrom cat *.psl | hgLoadPsl -fastLoad -table=blatFr1 galGal1 stdin mkdir /gbdb/galGal1/fuguSeq ln -s /cluster/data/fr1/fugu_v3.masked.fa /gbdb/galGal1/fuguSeq/ cd /cluster/data/galGal1/bed/blatFr1 hgLoadSeq galGal1 /gbdb/galGal1/fuguSeq/fugu_v3.masked.fa # LOAD CPGISSLANDS (DONE 2/1/04 angie) ssh hgwdev mkdir -p /cluster/data/galGal1/bed/cpgIsland cd /cluster/data/galGal1/bed/cpgIsland # Build software from Asif Chinwalla (achinwal@watson.wustl.edu) cvs co hg3rdParty/cpgIslands cd hg3rdParty/cpgIslands make mv cpglh.exe /cluster/data/galGal1/bed/cpgIsland/ ssh kksilo cd /cluster/data/galGal1/bed/cpgIsland foreach f (../../*/chr*.fa.masked) set fout=$f:t:r:r.cpg echo running cpglh on $f to $fout ./cpglh.exe $f > $fout.cpg end # Transform cpglh output to bed + cat << '_EOF_' > filter.awk /* chr1\t1325\t3865\t754\tCpG: 183\t64.9\t0.7 */ /* Transforms to: (tab separated columns above, spaces below) */ /* chr1 1325 3865 CpG: 183 754 183 489 64.9 0.7 */ { width = $3-$2; printf("%s\t%s\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\n", $1,$2,$3,$5,$6,width,$6,width*$7*0.01,100.0*2*$6/($3-$2),$7);} '_EOF_' # << this line makes emacs coloring happy awk -f filter.awk chr*.cpg > cpgIsland.bed # load into database: ssh hgwdev cd /cluster/data/galGal1/bed/cpgIsland hgLoadBed galGal1 cpgIsland -tab -noBin \ -sqlTable=$HOME/kent/src/hg/lib/cpgIsland.sql cpgIsland.bed # LOAD SOFTBERRY GENES (TODO) cd /cluster/data/galGal1/bed mkdir softberry cd softberry wget \ ftp://www.softberry.com/pub/SC_CHICKEN_JUN03/Softb_fgenesh_chicken_jun03.tar.gz tar xvzf Softb_fgenesh_chicken_jun03.tar.gz ldHgGene galGal1 softberryGene chr*.gff hgPepPred galGal1 softberry *.protein hgSoftberryHom galGal1 *.protein # BUILD BLAST DATABASES cd /cluster/data/galGal1 mkdir blastDb for i in `cat chrom.lst`; do for j in `echo $i/chr$i_*/chr*_*_*.fa`; do ln -s `pwd`/$j blastDb/`basename $j .fa`; done; done cd blastDb for i in *; do formatdb -p F -i $i; done # END BLAST # TIGR GENE INDEX (TODO) mkdir -p /cluster/data/galGal1/bed/tigr cd /cluster/data/galGal1/bed/tigr wget ftp://ftp.tigr.org//pub/data/tgi/TGI_track_ChickenGenome_Jun2003.tgz tar xvfz TGI_track_ChickenGenome_Jun2003.tgz foreach f (*cattle*) set f1 = `echo $f | sed -e 's/cattle/cow/g'` mv $f $f1 end foreach o (rat cow human pig chicken) setenv O $o foreach f (chr*_$o*s) tail +2 $f | perl -wpe 's /THC/TC/; s/(TH?C\d+)/$ENV{O}_$1/;' > $f.gff end end ldHgGene -exon=TC galGal1 tigrGeneIndex *.gff # 139456 gene predictions gzip *TCs *.gff # GC 5 BASE WIGGLE TRACK (DONE - 2004-02-04 - Hiram) # working on kksilo, the fileserver for this data. ssh kksilo mkdir /cluster/data/galGal1/bed/gc5Base cd /cluster/data/galGal1/bed/gc5Base mkdir wigData5 dataLimits5 for n in ../../nib/*.nib do c=`basename ${n} | sed -e "s/.nib//"` C=`echo $c | sed -e "s/chr//"` echo -n "working on ${c} - ${C} ... " hgGcPercent -chr=${c} -doGaps \ -file=stdout -win=5 galGal1 ../../nib | grep -w GC | \ awk ' { bases = $3 - $2 perCent = $5/10.0 printf "%d\t%.1f\n", $2+1, perCent }' | wigAsciiToBinary \ -dataSpan=5 -chrom=${c} -wibFile=wigData5/gc5Base_${C} \ -name=${C} stdin > dataLimits5/${c} echo "done ${c}" done # data is complete, load track on hgwdev ssh hgwdev cd /cluster/data/galGal1/bed/gc5Base hgLoadWiggle galGal1 gc5Base wigData5/*.wig # create symlinks for .wib files mkdir /gbdb/galGal1/wib ln -s `pwd`/wigData5/*.wib /gbdb/galGal1/wib # the trackDb entry track gc5Base shortLabel GC Counts longLabel GC Percent in 5 base windows group map priority 23.5 visibility hide autoScale Off maxHeightPixels 128:36:16 graphTypeDefault Bar gridDefault OFF windowingFunction Mean color 0,128,255 altColor 255,128,0 viewLimits 30:70 type wig 0 100 # SUPERCONTIG LOCATIONS TRACK (DONE 2/5/04 angie) # Supercontig N is a collection of contained "ContigN.*". # Extract Supercontig starts and ends from AGP into bed. mkdir /cluster/data/galGal1/bed/supercontig cd /cluster/data/galGal1/bed/supercontig cat ../../chicken_agp/chr*.agp \ | perl -we 'while (<>) { \ chop; @words = split("\t"); \ next if ($words[4] eq "N"); \ if ($words[5] =~ /^(Contig\d+)\.(\d+)$/) { \ $sup = $1; $newNum = $2; \ $firstNum = $newNum if (! defined $firstNum); \ if (defined $lastSup && $lastSup ne $sup) { \ print "$chr\t$start\t$end\t$lastSup (.$firstNum-.$lastNum)\t1000\t$strand\n"; \ undef $start; undef $end; $firstNum = $newNum; \ } \ $chr = $words[0]; $cStart = $words[1]; $cEnd = $words[2]; \ $strand = $words[8]; \ $start = $cStart if (\!defined $start || $start > $cStart); \ $end = $cEnd if (\!defined $end || $end < $cEnd); \ $lastSup = $sup; $lastNum = $newNum; \ } else { die "bad 6th word $words[5]\n"; } \ } \ print "$chr\t$start\t$end\t$lastSup (.$firstNum-.$lastNum)\t1000\t$strand\n";' \ > supercontig.bed ssh hgwdev cd /cluster/data/galGal1/bed/supercontig hgLoadBed -tab galGal1 supercontig supercontig.bed # PLOT MARKER GENETIC VS. GENOMIC POS (DONE 2/5/04 angie) ssh kksilo mkdir /cluster/data/galGal1/bed/markerPlots cd /cluster/data/galGal1/bed/markerPlots # Need to find out where Terry got this file: cp -p /cluster/bluearc/gg1/markers/genetic.map . mkdir gnuplot # This is a one-shot and I'm writing a perl script -- too long for inliner. chmod a+x ./plotMarkers.pl ./plotMarkers.pl genetic.map ../markers/stsMarkersTmp.bed mkdir gif ssh hgwdev cd /cluster/data/galGal1/bed/markerPlots foreach f (gnuplot/*.gnuplot) gnuplot < $f > tmp.ppm ppmtogif tmp.ppm > gif/$f:t:r.gif rm tmp.ppm end mkdir /usr/local/apache/htdocs/angie/chickenPlots cp -p /cluster/data/galGal1/bed/markerPlots/gif/* \ /usr/local/apache/htdocs/angie/chickenPlots cp -p /cluster/data/galGal1/bed/markerPlots/*.txt \ /usr/local/apache/htdocs/angie/chickenPlots cp -p /cluster/bluearc/gg1/markers/genetic.map \ /usr/local/apache/htdocs/angie/chickenPlots/genetic_map.txt cp -p /cluster/data/galGal1/bed/markers/stsMarkersTmp.bed \ /usr/local/apache/htdocs/angie/chickenPlots/stsMarkerPlacements.txt # CREATING QUALITY SCORE TRACK (DONE 2/6/04 angie) ssh kksilo mkdir /cluster/data/galGal1/bed/qual cd /cluster/data/galGal1/bed/qual cat ../../chicken_agp/chr*.agp > assembly.agp tar xvzf ../../chicken_040105.contigs.qual.tar.gz cat chicken*.qual | qaToQac stdin stdout \ | ~/bin/i386/chimpChromQuals assembly.agp stdin chrom.qac rm chicken*.qual mkdir wigData dataLimits foreach c (`cat ../../chrom.lst`) foreach agp (../../$c/chr$c.agp ../../$c/chr${c}_random.agp) if (-e $agp) then set chr = $agp:t:r set abbrev = `echo $chr | sed -e 's/^chr//; s/_random/r/;'` echo $chr to $abbrev wiggle qacToWig chrom.qac -name=$chr stdout \ | wigAsciiToBinary -dataSpan=1 -chrom=$chr \ -wibFile=wigData/qual_$abbrev -name=$abbrev stdin \ > dataLimits/$chr endif end end # Verify size of .wib file = chrom length foreach f (wigData/*.wib) set abbrev = $f:t:r set chr = `echo $abbrev | sed -e 's/^qual_/chr/; s/r$/_random/;'` set wibLen = `ls -l $f | awk '{print $5;}'` set chromLen = `grep -w $chr ../../chrom.sizes | awk '{print $2;}'` if ($wibLen != $chromLen) then echo "ERROR: $chr size is $chromLen but wib size is $wibLen" else echo $chr OK. endif end #ERROR: chr26 size is 4262107 but wib size is 3762107 # That's OK -- chr26 ends in a 500000bp centromere gap -- not incl'd in wib # /gbdb & load: ssh hgwdev cd /cluster/data/galGal1/bed/qual mkdir -p /gbdb/galGal1/wib ln -s `pwd`/wigData/*.wib /gbdb/galGal1/wib hgLoadWiggle galGal1 quality wigData/*.wig # To speed up display for whole chrom views, need to make zoomed # data views. Zoom to 1K points per pixel - Hiram 2004-02-17 ssh kksilo cd /cluster/data/galGal1/bed/qual/wigData1K mkdir -p wigData1K mkdir -p dataLimits1K for c in `cat ../../chrom.lst` do if [ -f ../../${c}/chr${c}.agp ]; then echo "chr${c} quality 1K zoom" qacToWig chrom.qac -name=chr${c} stdout | wigZoom stdin | wigAsciiToBinary -dataSpan=1024 \ -chrom=chr${c} -wibFile=wigData1K/qc1K_${c} \ -name=${c} stdin > dataLimits1K/chr${c} fi if [ -f ../../${c}/chr${c}_random.agp ]; then echo "chr${c}_random quality 1K zoom" qacToWig chrom.qac -name=chr${c}_random stdout | wigZoom stdin | wigAsciiToBinary -dataSpan=1024 \ -chrom=chr${c}_random -wibFile=wigData1K/qc1K_${c}r \ -name=${c}_random stdin > dataLimits1K/chr${c}r fi done ssh hgwdev cd /cluster/data/galGal1/bed/qual/wigData1K # load in addition to existing data hgLoadWiggle -oldTable galGal1 quality *.wig # create symlinks for .wib files ln -s `pwd`/*.wib /gbdb/galGal1/wib # PLOT MRNA-MARKER GENETIC VS. GENOMIC POS (DONE 2/10/04 angie) ssh hgwdev mkdir /cluster/data/galGal1/bed/mrnaPlots cd /cluster/data/galGal1/bed/mrnaPlots # Need to find out where Terry got this file: cp -p /cluster/bluearc/gg1/markers/stsAlias.bed . echo 'select all_mrna.tName, all_mrna.tStart, all_mrna.tEnd, \ all_mrna.qName, 0, all_mrna.strand, geneName.name \ from all_mrna, mrna, geneName \ where all_mrna.qName = mrna.acc and mrna.geneName != 0 and \ mrna.geneName = geneName.id' \ | hgsql galGal1 -N > mrnaWithNames.bed mkdir gnuplot # This is a one-shot and I'm writing a perl script -- too long for inliner. chmod a+x ./plotMrnaMarkers.pl ./plotMrnaMarkers.pl ../markerPlots/genetic.map stsAlias.bed \ mrnaWithNames.bed # Too few results to plot -- just emailed summary to LaDeana et al. # COMPARE GENBANK MRNA ANNOTATED POSITION TO ALIGNED (DONE 2/11/04 angie) ssh hgwdev cd /cluster/data/galGal1/bed/mrnaPlots echo 'select tName, tStart, tEnd, qName, 0, strand from all_mrna' \ | hgsql galGal1 -N > mrnaPos.bed # write another 1-shot perl script chmod a+x getMrnaGBPos.pl # eieio is the fileserver for genbank stuff... ssh eieio cd /cluster/data/galGal1/bed/mrnaPlots gunzip -c /cluster/store5/genbank/data/download/genbank.139.0/gbvrt*.gz \ | ./getMrnaGBPos.pl mrnaPos.bed # emailed report files mrnaGB*.txt