# for emacs: -*- mode: sh; -*- # This file describes building the browser database for the archaeal # species Pyrococcus furiosus # DOWNLOAD SEQUENCE FROM GENBANK (DONE 02/10/04) ssh eieo mkdir /cluster/store5/archae/pyrFur2 ln -s /cluster/store5/archae/pyrFur2 /cluster/data/pyrFur2 cd /cluster/data/pyrFur2 wget ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.fna mv NC_003413.fna chr1.fa # Edit header of chr1.fa to '> pyrAer1' # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 02/10/04) ssh hgwdev echo 'create database pyrFur2' | hgsql '' cd /cluster/data/pyrFur2 hgNibSeq pyrFur2 /cluster/data/pyrFur2/nib chr1.fa faSize -detailed chr1.fa > chrom.sizes mkdir -p /gbdb/pyrFur2/nib echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \ | hgsql pyrFur2 echo 'INSERT INTO dbDb \ (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk) values \ ("pyrFur2", "Pyrococcus furiosus", "/gbdb/pyrFur2/nib", "P. furiosus", \ "chr1:500000-550000", 1, 85, "Archae", \ "Pyrococcus furiosus", "/gbdb/pyrFur2/html/description.html", \ 0);' \ | hgsql -h genome-testdb hgcentraltest cd ~/kent/src/hg/makeDb/trackDb # edit the trackDb makefile # add the trackDb directories mkdir -p archae/pyrFur2 cvs add archae cvs add archae/pyrFur2 cvs commit # GC PERCENT TRACK (DONE 02/10/04) ssh hgwdev mkdir -p /cluster/data/pyrFur2/bed/gcPercent cd /cluster/data/pyrFur2/bed/gcPercent hgsql pyrFur2 < ~/kent/src/hg/lib/gcPercent.sql hgGcPercent -win=2000 pyrFur2 ../../nib # edit ~/kent/src/hg/makeDb/trackDb/archae/trackDb.ra and add an entry for # GC percent for 2,000 base windows instead (simply cut and paste # ~/kent/src/hg/makeDb/trackDb/trackDb.ra). # GC 20 BASE WIGGLE TRACK (DONE 8/25) mkdir /cluster/data/pyrFur2/bed/gc20Base cd /cluster/data/pyrFur2/bed/gc20Base mkdir wigData20 dataLimits20 hgGcPercent -chr=chr -file=stdout -win=20 -overlap=19 pyrFur2 ../../nib | grep -w GC | \ awk ' { bases = $3 - $2 perCent = $5/10.0 printf "%d\t%.1f\n", $2+1, perCent }' | wigAsciiToBinary -dataSpan=1 -chrom=chr \ -wibFile=wigData20/gc20Base_1 -name=1 stdin > dataLimits20/chr hgLoadWiggle pyrFur2 gc20Base wigData20/*.wig mkdir /gbdb/pyrFur2/wib ln -s `pwd`/wigData20/*.wib /gbdb/pyrFur2/wib # GENBANK PROTEIN-CODING GENES (DONE 01/14/04) ssh hgwdev mkdir /cluster/data/pyrFur2/genbank cd /cluster/data/pyrFur2/genbank wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Pyrococcus_furiosus/NC_003413.gbk mv NC_003413.gbk pyrFur2.gbk # Create 3 files to assist parsing of the genbank # 1. for a bed file echo 'chr1 start end locus_tag 1000 strand' > pyrFur2-params-bed.txt # 2. for the peptide parts echo 'locus_tag translation' > pyrFur2-params-pep.txt # 3. for the other gene information echo 'locus_tag product note' > pyrFur2-params-xra.txt # Now extract the genes and information: gbArchaeGenome pyrFur2.gbk pyrFur2-params-bed.txt pyrFur2-genbank-cds.bed gbArchaeGenome pyrFur2.gbk pyrFur2-params-pep.txt pyrFur2-genbank-cds.pep gbArchaeGenome pyrFur2.gbk pyrFur2-params-xra.txt pyrFur2-genbank-cds.xra hgLoadBed pyrFur2 gbProtCode pyrFur2-genbank-cds.bed hgsql pyrFur2 < ~/kent/src/hg/lib/pepPred.sql hgsql pyrFur2 < ~/kent/src/hg/lib/minGeneInfo.sql echo rename table pepPred to gbProtCodePep | hgsql pyrFur2 echo rename table minGeneInfo to gbProtCodeXra | hgsql pyrFur2 echo load data local infile \'pyrFur2-genbank-cds.pep\' into table gbProtCodePep | hgsql pyrFur2 echo load data local infile \'pyrFur2-genbank-cds.xra\' into table gbProtCodeXra | hgsql pyrFur2 # TIGR GENES (DONE 02/10/04) # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi # and fill out the web form as follows: # - Pick "Retrieve attributes for the specified DNA feature within a specific # organism and/or a specific role category". # * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" # from the 1st and 3rd box. # * Select everything from "Choose TIGR Annotation Gene Attributes" # * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes" # * Select everything from "Choose Other Gene Attributes" # - Click submit, and click save as tab-delimited file. ssh hgwdev mkdir /cluster/data/pyrFur2/bed/tigrCmrORFs cp pyrFur2-tigr.tab /cluster/data/pyrFur2/bed/tigrCmrORFs cd /cluster/data/pyrFur2/bed/tigrCmrORFs tigrCmrToBed pyrFur2-tigr.tab pyrFur2-tigr.bed hgLoadBed -tab pyrFur2 tigrCmrGene pyrFur2-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql echo RENAME TABLE tigrCmrGene to tigrCmrORFs | hgsql pyrFur2 # Lowe Lab Microarrays (DONE 02/10/04) ssh hgwdev mkdir /cluster/data/pyrFur2/bed/llaPfuPrintA cp Pfu-arrays.{bed,exps} /cluster/data/pyrFur2/bed/llaPfuPrintA cd /cluster/data/pyrFur2/bed/llaPfuPrintA hgLoadBed pyrFur2 llaPfuPrintA Pfu-arrays.bed hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql pyrFur2 echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql pyrFur2 # for now I'm using both hgFixed and the pyrFur2 database for this... that will change. hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql echo RENAME TABLE expRecord to llaPfuPrintAExps | hgsql hgFixed echo LOAD DATA LOCAL INFILE \'Pfu-arrays.exps\' INTO TABLE llaPfuPrintAExps | hgsql hgFixed # CHANGE "chr1" to "chr" ssh hgdev mv /cluster/data/pyrFur2/nib/chr1.nib /cluster/data/pyrFur2/nib/chr.nib rm /gbdb/pyrFur2/nib/chr1.nib ln -s /cluster/data/pyrFur2/nib/chr.nib /gbdb/pyrFur2/nib/chr.nib # a quick script to replace chr1 with chr #!/bin/bash sed 's/chr1/chr/g' $1 > /tmp/whatever mv /tmp/whatever $1 cd /cluster/data/pyrFur2/bed find -name '*.bed' | xargs changeCh.sh # Now change the DB cd /tmp hgsqldump pyrFur2 | sed 's/chr1/chr/g' > pfuNew.sql echo drop database pyrFur2 | hgsql echo create database pyrFur2 | hgsql hgsql pyrFur2 < pfuNew.sql rm pfuNew.sql echo 'update dbDb set defaultPos="chr:550000-580000" where name="pyrFur2"' | hgsql -h genome-testdb hgcentraltest # promoter track ssh hgwdev mkdir -p /cluster/data/pyrFur2/wiggle/promoterScanPos cd /cluster/data/pyrFur2/wiggle/promoterScanPos cp plus.wig.gz . wigAsciiToBinary -chrom=chr -wibFile=promoterScanPos plus.wig.gz hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanPos pyrFur2 promoterScanPos promoterScanPos.wig mkdir /gbdb/pyrFur2/wib/promoterScanPos ln -s `pwd`/*.wib /gbdb/pyrFur2/wib/promoterScanPos/ mkdir /cluster/data/pyrFur2/promoterScanNeg cd /cluster/data/pyrFur2/promoterScanNeg cp minus.wig.gz . wigAsciiToBinary -chrom=chr -wibFile=promoterScanNeg minus.wig.gz hgLoadWiggle -pathPrefix=/gbdb/pyrFur2/wib/promoterScanNeg pyrFur2 promoterScanNeg promoterScanNeg.wig mkdir /gbdb/pyrFur2/wib/promoterScanNeg ln -s `pwd`/*.wib /gbdb/pyrFur2/wib/promoterScanNeg/ # shine DG track cd /cluster/data/pyrFur2/wiggle mkdir shineDGPos mkdir shineDGNeg cd shineDGPos cp shine.pos.gz . wigAsciiToBinary -chrom=chr -wibFile=shineDGPos shine.pos.gz hgLoadWiggle pyrFur2 shineDGPos shineDGPos.wig cd ../shineDGNeg cp shine.neg.gz . wigAsciiToBinary -chrom=chr -wibFile=shineDGNeg shine.neg.gz hgLoadWiggle pyrFur2 shineDGNeg shineDGNeg.wig cd /gbdb/pyrFur2/wib ln -s /cluster/data/pyrFur2/wiggle/shineDGPos/shineDGPos.wib shineDGPos.wib ln -s /cluster/data/pyrFur2/wiggle/shineDGNeg/shineDGNeg.wib shineDGNeg.wib # Rfam TRACK cd /cluster/data/pyrFur2/bed mkdir Rfam cd Rfam/ # file cp pfu.rfam.bed . hgLoadBed pyrFur2 Rfam pfu.rfam.bed # trackDb entry: track Rfam shortLabel Rfam longLabel Rfam noncoding RNA group genes priority 2.3 visibility pack color 43,127,60 type bed 6 . # Heatshock array track cd /cluster/data/pyrFur2/bed mkdir llaPfuPrintC cd llaPfuPrintC/ cp Pfu-hs.bed . cp Pfu-hs.other . hgLoadBed pyrFur2 llaPfuPrintC Pfu-hs.bed hgsql pyrFur2 < ~/kent/src/hg/lib/expRecord.sql echo 'rename table expRecord to llaPfuPrintCExps' | hgsql pyrFur2 hgsql hgFixed < ~/kent/src/hg/lib/expRecord.sql echo 'rename table expRecord to llaPfuPrintCExps' | hgsql hgFixed echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql pyrFur2 echo 'load data local infile "Pfu-hs.other" into table llaPfuPrintCExps' | hgsql hgFixed # trackDb entry: track llaPfuPrintC shortLabel HS Microarray longLabel Heat Shock full-genome P. furiosus microarray experiments group regulation priority 86.0 visibility dense type expRatio expScale 4.0 expStep 0.5 expTable llaPfuPrintCExps chip printA canPack off # Matt's Operons cd /cluster/data/pyrFur2/bed/llOperons cp matt.bed . hgLoadBed pyrFur2 llOperons matt.bed # RNA genes cd /cluster/data/pyrFur2/bed mkdir rnaGenes cp pfuAllRNA.bed . cp ~/kent/src/hg/lib/rnaGenes.sql . hgLoadBed -sqlTable=rnaGenes.sql pyrFur2 rnaGenes pfuAllRNA.bed # EasyGene cd /cluster/data/pyrFur2/bed mkdir easyGene cd easyGene wget -O easyGene.gff "http://www.binf.ku.dk/cgi-bin/easygene/output?id=129&format=gff&desc=CDS&desc=ORF&desc=ALT&r_cutoff=2&start=&stop=&strand=plus&strand=minus&per_page=-1&250=" pernilleGffToBed easyGene.gff easyGene.bed cp ~/kent/src/hg/lib/easyGene.sql . hgLoadBed -sqlTable=easyGene.sql pyrFur2 easyGene easyGene.bed # MULTIZ with P. hori, P. abyssi, T. kodakarensis # This is redo of an earlier run, so some files are already in place # DONE (10/8/05), kpollard cd /cluster/data/pyrFur2/bed/conservation mv Thermokoda.chr therKoda1.chr #edit headers to be pyrFur2, pyrHor1, pyrAby1, therKoda1 /cluster/bin/i386/faToNib pyrFur2.chr pyrFur2.chr.nib /cluster/bin/i386/faToNib pyrHor1.chr pyrHor1.chr.nib /cluster/bin/i386/faToNib pyrAby1.chr pyrAby1.chr.nib /cluster/bin/i386/faToNib therKoda1.chr therKoda1.chr.nib faSize -detailed *.chr > chrom.sizes #blastz blastz pyrFur2.chr pyrAby1.chr Q=HoxD55.q > pyrFur2-pyrAby1.lav blastz pyrFur2.chr pyrHor1.chr Q=HoxD55.q > pyrFur2-pyrHor1.lav blastz pyrFur2.chr therKoda1.chr Q=HoxD55.q > pyrFur2-therKoda1.lav #lavToAxt /cluster/bin/i386/lavToAxt pyrFur2-pyrAby1.lav . . pyrFur2-pyrAby1.axt /cluster/bin/i386/lavToAxt pyrFur2-pyrHor1.lav . . pyrFur2-pyrHor1.axt /cluster/bin/i386/lavToAxt pyrFur2-therKoda1.lav . . pyrFur2-therKoda1.axt #axtBest axtBest pyrFur2-pyrAby1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrAby1-best.axt axtBest pyrFur2-pyrHor1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-pyrHor1-best.axt axtBest pyrFur2-therKoda1.axt pyrFur2.chr -winSize=500 -minScore=5000 pyrFur2-therKoda1-best.axt #axtToMaf axtToMaf pyrFur2-pyrAby1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrAby1.maf axtToMaf pyrFur2-pyrHor1-best.axt chrom.sizes chrom.sizes pyrFur2-pyrHor1.maf axtToMaf pyrFur2-therKoda1-best.axt chrom.sizes chrom.sizes pyrFur2-therKoda1.maf #multiz #delete extra header lines from maf files multiz pyrFur2-pyrAby1.maf pyrFur2-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1.maf multiz pyrFur2-therKoda1.maf pyrFur2-pyrAby1-pyrHor1.maf - > pyrFur2-pyrAby1-pyrHor1-therKoda1.maf #phyloHMM /cluster/bin/phast/msa_view -i MAF -M pyrFur2.chr -o SS pyrFur2-pyrAby1-pyrHor1-therKoda1.maf > pyrFur2.ss /cluster/bin/phast/phyloFit -i SS pyrFur2.ss -t "((pyrFur2,(pyrAby1,pyrHor1)),therKoda1)" /cluster/bin/phast/msa_view -i SS pyrFur2.ss --summary-only /cluster/bin/phast/phastCons pyrFur2.ss phyloFit.mod --gc 0.4496 \ --target-coverage 0.7 --estimate-trees pyr-tree \ --expected-lengths 25 --no-post-probs --ignore-missing \ --nrates 1,1 /cluster/bin/phast/phastCons pyrFur2.ss \ pyr-tree.cons.mod,pyr-tree.noncons.mod \ --target-coverage 0.7 --expected-lengths 25 \ --viterbi pyrFur2-elements.bed --score \ --require-informative 0 --seqname chr > cons.dat wigEncode cons.dat phastCons.wig phastCons.wib draw_tree phyloFit.mod > pyr-tree.ps #make ai and jpg files in Illustrator cp pyr-tree.jpg /usr/local/apache/htdocs/images/ #move data mkdir wib mv phastCons.wib wib/phastCons.wib mv phastCons.wig wib/phastCons.wig ln -s /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wib /gbdb/pyrFur2/wib mkdir -p /gbdb/pyrFur2/pwMaf mkdir -p otherSpp/pyrAby1 otherSpp/pyrHor1 otherSpp/therKoda1 mv pyrFur2-pyrAby1.maf otherSpp/pyrAby1/chr.maf mv pyrFur2-pyrHor1.maf otherSpp/pyrHor1/chr.maf mv pyrFur2-therKoda1.maf otherSpp/therKoda1/chr.maf ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrAby1 /gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/pyrHor1 /gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf ln -s /cluster/data/pyrFur2/bed/conservation/otherSpp/therKoda1 /gbdb/pyrFur2/pwMaf/therKoda1_pwMaf mkdir multiz mv pyrFur2-pyrAby1-pyrHor1-therKoda1.maf multiz/chr.maf ln -s /cluster/data/pyrFur2/bed/conservation/multiz /gbdb/pyrFur2/multizPf2Pa1Ph1Tk1 #load hgLoadWiggle pyrFur2 phastCons /cluster/data/pyrFur2/bed/conservation/wib/phastCons.wig hgLoadMaf -warn pyrFur2 multizPf2Pa1Ph1Tk1 hgLoadMaf -warn pyrFur2 pyrAby1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrAby1_pwMaf hgLoadMaf -warn pyrFur2 pyrHor1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/pyrHor1_pwMaf hgLoadMaf -warn pyrFur2 therKoda1_pwMaf -pathPrefix=/gbdb/pyrFur2/pwMaf/therKoda1_pwMaf hgLoadBed pyrFur2 phastConsElements pyrFur2-elements.bed #trackDb cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2 #trackDb.ra entry # track multizPf2Pa1Ph1Tk1 # shortLabel Conservation # longLabel Pyrococcus 4-way multiz alignments # group compGeno # priority 10.0 # visibility pack # type wigMaf 0.0 1.0 # maxHeightPixels 100:40:11 # wiggle phastCons # yLineOnOff Off # autoScale Off # pairwise pwMaf # speciesOrder pyrAby1 pyrHor1 therKoda1 cvs commit -m "New multiz track" trackDb.ra #html page for multizPf2Pa1Ph1Tk1 cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2 mv multizPyro.html multizPf2Pa1Ph1Tk1.html cvs remove multizPyro.html cvs add multizPf2Pa1Ph1Tk1.html cvs commit -m "Details page for multiz track" multizPf2Pa1Ph1Tk1.html #KEGG cd ~/kent/src/hg/makeDb/trackDb/archae/pyrFur2/bed mkdir kegg cd kegg ~/kent/src/hg/protein/KGpath.sh pyrFur2 pyrFur2 051019 ### RNA LP FOLD (Daryl Thomas and Matthias Hoechsmann; September 7, 2006) set workDir = /cluster/data/pyrFur2/bed/rnaLpFold mkdir -p $workDir cd $workDir set file = pyrFur2_dna.bed sed 's/chr1/chr/' ~mhoechsm/transfer/${file} >! ${file} # cp ~mhoechsm/transfer/${file} ${file} awk '{if ($3-$2>max) max=$3-$2} END {print max}' ${file} set db = pyrFur2 hgsql ${db} -e "drop table rnaLpFold" hgsql ${db} < ${HOME}/kent/src/hg/lib/rnaLpFold.sql hgLoadBed -noSort -oldTable -strict ${db} rnaLpFold ${file} rm -f bed.tab ###