# for emacs: -*- mode: sh; -*- # This file describes building the browser database for the archaeal # species Methanosarcina acetivorans. # # if this is the first time you are making your own hgwdev browser, need to do # cd ~/kent/src/, then a make # DOWNLOAD SEQUENCE FROM GENBANK (DONE 10/05) mkdir /cluster/store5/archae/sulfToko1 ln -s /cluster/store5/archae/sulfToko1 /cluster/data/sulfToko1 cd /cluster/data/sulfToko1 cp /projects/lowelab/db/Bacteria/Sulfolobus_tokodaii/Sulf_toko* . mv Sulf_toko.fa sulfToko1.fa grep ">" sulfToko1.fa # Edit header of sulfToko1.fa seqs to '>chr >plasmid_pNRC100 >plasmid_pNRC200' faToTwoBit sulfToko1.fa sulfToko1.2bit mkdir /gbdb/sulfToko1 ln -s /cluster/data/sulfToko1/sulfToko1.2bit /gbdb/sulfToko1/sulfToko1.2bit # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 10/05) ssh hgwdev echo 'create database sulfToko1' | hgsql '' cd /cluster/data/sulfToko1 faSize -detailed sulfToko1.fa > chrom.sizes echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" | hgsql sulfToko1 echo 'INSERT INTO dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, htmlPath, hgNearOk) values ("sulfToko1", "August 2001", "/gbdb/sulfToko1", "Sulfolobus tokodaii", "chr:500000-550000", 1, 222, "Sulfolobus tokodaii","Sulfolobus tokodaii str. 7", "/gbdb/sulfToko1/html/description.html", 0);' | hgsql hgcentraltest echo 'INSERT INTO defaultDb (genome, name) values ("Sulfolobus tokodaii", "sulfToko1");' | hgsql hgcentraltest echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Sulfolobus tokodaii", "archaea",85);' | hgsql hgcentraltest # CREATE CHROMINFO TABLE (DONE 10/05) ssh hgwdev cd /cluster/data/sulfToko1 cp ~baertsch/kent/src/hg/lib/chromInfo.sql . hgsql sulfToko1 < chromInfo.sql echo "load data local infile 'chrom.sizes' into table chromInfo" | hgsql sulfToko1 echo "update chromInfo set fileName = '/gbdb/sulfToko1/sulfToko1.2bit'" | hgsql sulfToko1 cd ~/kent/src/hg/makeDb/trackDb # add the trackDb directories mkdir -p archae/sulfToko1 cvs add archae/sulfToko1 cvs commit archae/sulfToko1 make DBS=sulfToko1 # GC20BASE (DONE 10/05) mkdir -p /cluster/data/sulfToko1/bed/gc20Base cd /cluster/data/sulfToko1/bed/gc20Base hgGcPercent -wigOut -doGaps -file=stdout -overlap=10 -win=20 sulfToko1 /cluster/data/sulfToko1/ | wigEncode stdin gc20Base.wig gc20Base.wib cd /cluster/data/sulfToko1/bed/gc20Base mkdir /gbdb/sulfToko1/wib ln -s `pwd`/gc20Base.wib /gbdb/sulfToko1/wib hgLoadWiggle -pathPrefix=/gbdb/sulfToko1/wib sulfToko1 gc20Base gc20Base.wig # verify index is correct: hgsql sulfToko1 -e "show index from gc20Base;" # should see good numbers in Cardinality column # TANDEM REPEAT MASKER (DONE 10/05) ssh hgwdev mkdir -p /cluster/data/sulfToko1/bed/simpleRepeat cd /cluster/data/sulfToko1 trfBig sulfToko1.fa /dev/null -bedAt=/cluster/data/sulfToko1/bed/simpleRepeat/chr.bed cd /cluster/data/sulfToko1/bed/simpleRepeat hgLoadBed sulfToko1 simpleRepeat *.bed -sqlTable=/cluster/home/lowe/kent/src/hg/lib/simpleRepeat.sql # MULTIZ (DONE 10/7/05) cd /cluster/data/sulfToko1/bed mkdir conservation cd conservation cp /cluster/data/pyrFur2/bed/conservation/HoxD55.q . cp /cluster/data/sulSol1/bed/conservation/sulAci1.chr sulfAcid1.chr cp /cluster/data/sulSol1/bed/conservation/sulTok1.chr sulfToko1.chr cp /cluster/data/sulSol1/bed/conservation/sulSol1.chr . #edit headers for Aci and Tok to be sulfAcid1, sulfToko1 /cluster/bin/i386/faToNib sulfAcid1.chr sulfAcid1.chr.nib /cluster/bin/i386/faToNib sulfToko1.chr sulfToko1.chr.nib cp /cluster/data/sulSol1/bed/conservation/sulSol1.chr.nib . faSize -detailed *.chr > chrom.sizes #blastz for Tok-Aci blastz sulfToko1.chr sulfAcid1.chr Q=HoxD55.q > sulfToko1-sulfAcid1.lav /cluster/bin/i386/lavToAxt sulfToko1-sulfAcid1.lav . . sulfToko1-sulfAcid1.axt axtBest sulfToko1-sulfAcid1.axt sulfToko1.chr -winSize=500 -minScore=5000 sulfToko1-sulfAcid1-best.axt axtToMaf sulfToko1-sulfAcid1-best.axt chrom.sizes chrom.sizes sulfToko1-sulfAcid1.maf #blastz for Tok-Sol blastz sulfToko1.chr sulSol1.chr Q=HoxD55.q > sulfToko1-sulSol1.lav /cluster/bin/i386/lavToAxt sulfToko1-sulSol1.lav . . sulfToko1-sulSol1.axt axtBest sulfToko1-sulSol1.axt sulfToko1.chr -winSize=500 -minScore=5000 sulfToko1-sulSol1-best.axt axtToMaf sulfToko1-sulSol1-best.axt chrom.sizes chrom.sizes sulfToko1-sulSol1.maf #multiz #delete extra header lines from maf files multiz sulfToko1-sulfAcid1.maf sulfToko1-sulSol1.maf - > sulfToko1-sulfAcid1-sulSol1.maf #phyloHMM /cluster/bin/phast/msa_view -i MAF -M sulfToko1.chr -o SS sulfToko1-sulfAcid1-sulSol1.maf > sulfToko1.ss /cluster/bin/phast/phyloFit -i SS sulfToko1.ss -t "(sulSol1,(sulfToko1,sulfAcid1))" /cluster/bin/phast/msa_view -i SS sulfToko1.ss --summary-only /cluster/bin/phast/phastCons sulfToko1.ss phyloFit.mod --gc 0.3461 \ --target-coverage 0.7 --estimate-trees sul-tree \ --expected-lengths 25 --no-post-probs --ignore-missing \ --nrates 1,1 /cluster/bin/phast/phastCons sulfToko1.ss \ sul-tree.cons.mod,sul-tree.noncons.mod \ --target-coverage 0.7 --expected-lengths 25 \ --viterbi sulfToko1-elements.bed --score \ --require-informative 0 --seqname chr > cons.dat wigEncode cons.dat phastCons.wig phastCons.wib draw_tree phyloFit.mod > sulfToko1-tree.ps #check that tree is similar to sul-tree.ps from sulSol1 #move data mkdir wib mv phastCons.wib wib/phastCons.wib mv phastCons.wig wib/phastCons.wig ln -s /cluster/data/sulfToko1/bed/conservation/wib/phastCons.wib /gbdb/sulfToko1/wib mkdir -p /gbdb/sulfToko1/pwMaf mkdir -p otherSul/sulfAcid1 otherSul/sulSol1 mv sulfToko1-sulfAcid1.maf otherSul/sulfAcid1/chr.maf mv sulfToko1-sulSol1.maf otherSul/sulSol1/chr.maf ln -s /cluster/data/sulfToko1/bed/conservation/otherSul/sulfAcid1 /gbdb/sulfToko1/pwMaf/sulfAcid1_pwMaf ln -s /cluster/data/sulfToko1/bed/conservation/otherSul/sulSol1 /gbdb/sulfToko1/pwMaf/sulSol1_pwMaf mkdir multiz mv sulfToko1-sulfAcid1-sulSol1.maf multiz/chr.maf ln -s /cluster/data/sulfToko1/bed/conservation/multiz /gbdb/sulfToko1/multizSt1Sa1Ss1 #load hgLoadWiggle sulfToko1 phastCons /cluster/data/sulfToko1/bed/conservation/wib/phastCons.wig hgLoadMaf -warn sulfToko1 multizSt1Sa1Ss1 hgLoadMaf -warn sulfToko1 sulfAcid1_pwMaf -pathPrefix=/gbdb/sulfToko1/pwMaf/sulfAcid1_pwMaf hgLoadMaf -warn sulfToko1 sulAci1_pwMaf -pathPrefix=/gbdb/sulfToko1/pwMaf/sulSol1_pwMaf hgLoadBed sulfToko1 phastConsElements sulfToko1-elements.bed #trackDb cd ~/kent/src/hg/makeDb/trackDb/archae mkdir sulfToko1 cvs add sulfToko1 cd sulfToko1 #trackDb.ra entry # track multizSt1Sa1Ss1 # shortLabel Conservation # longLabel Sulfolobus 3-way multiz alignments # group compGeno # priority 10.0 # visibility pack # type wigMaf 0.0 1.0 # maxHeightPixels 100:40:11 # wiggle phastCons # yLineOnOff Off # autoScale Off # pairwise pwMaf # speciesOrder sulfAcid1 sulSol1 cvs add trackDb.ra cvs commit -m "Added multiz track" trackDb.ra #html page for multizSt1Sa1Ss1 cd ~/kent/src/hg/makeDb/trackDb/archae/sulfToko1 cvs add multizSt1Sa1Ss1.html cvs commit -m "Details page for multiz track" multizSt1Sa1Ss1.html # DESCRIPTION PAGE (DONE 10/20/05), kpollard # Write ~/kent/src/hg/makeDb/trackDb/archae/sulfToko1/description.html cd ~/kent/src/hg/makeDb/trackDb/archae/sulfToko1/ chmod a+r description.html cvs add description.html cvs commit -m "description page" description.html mkdir -p /cluster/data/sulfToko1/html/ cp ~/kent/src/hg/makeDb/trackDb/archae/sulfToko1/description.html \ /cluster/data/sulfToko1/html/description.html mkdir -p /gbdb/sulfToko1/html ln -s /cluster/data/sulfToko1/html/description.html /gbdb/sulfToko1/html/ # GENBANK PROTEIN-CODING GENES (TO DO) ssh hgwdev mkdir /cluster/data/sulfToko1/genbank cd /cluster/data/sulfToko1/genbank cp /projects/lowelab/db/Bacteria/Sulfolobus_tokodaii/ . mv NC_003552.gbk sulfToko1.gbk # Create 3 files to assist parsing of the genbank # 1. for a bed file echo 'chr start end gene 1000 strand' > sulfToko1-params-bed.txt # 2. for the peptide parts echo 'gene translation' > sulfToko1-params-pep.txt # 3. for the other gene information echo 'gene product note' > sulfToko1-params-xra.txt # Now extract the genes and information: gbArchaeGenome sulfToko1.gbk sulfToko1-params-bed.txt sulfToko1-genbank-cds.bed gbArchaeGenome sulfToko1.gbk sulfToko1-params-pep.txt sulfToko1-genbank-cds.pep gbArchaeGenome sulfToko1.gbk sulfToko1-params-xra.txt sulfToko1-genbank-cds.xra hgLoadBed sulfToko1 gbProtCode sulfToko1-genbank-cds.bed hgsql sulfToko1 < ~/kent/src/hg/lib/pepPred.sql hgsql sulfToko1 < ~/kent/src/hg/lib/minGeneInfo.sql echo rename table pepPred to gbProtCodePep | hgsql sulfToko1 echo rename table minGeneInfo to gbProtCodeXra | hgsql sulfToko1 echo load data local infile \'sulfToko1-genbank-cds.pep\' into table gbProtCodePep | hgsql sulfToko1 echo load data local infile \'sulfToko1-genbank-cds.xra\' into table gbProtCodeXra | hgsql sulfToko1 #genbank to genePred csh tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' sulfToko1-genbank-cds.bed | bedToGenePred stdin tmp.gp tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,3,4),name2,"cmpl","cmpl",0}' tmp.gp > tmp2.gp join -t " " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.3 1.13 1.14 1.15 tmp2.gp sulfToko1-genbank-cds.xra > sulfToko1.gp # GENBANK rRNA GENES (TO DO) ssh hgdev cd /cluster/data/sulfToko1/genbank gbArchaeGenome -kind=rRNA sulfToko1.gbk sulfToko1-params-bed.txt sulfToko1-rrnas.bed echo 'gene product NA' > sulfToko1-params-rrna-xra.txt gbArchaeGenome -kind=rRNA sulfToko1.gbk sulfToko1-params-rrna-xra.txt sulfToko1-rrnas-xra.txt hgLoadBed sulfToko1 gbRRNA sulfToko1-rrnas.bed hgsql sulfToko1 < ~/kent/src/hg/lib/minGeneInfo.sql echo rename table minGeneInfo to gbRRNAXra | hgsql sulfToko1 echo load data local infile \'sulfToko1-rrnas-xra.txt\' into table gbRRNAXra | hgsql sulfToko1 # COG STUFF (TO DO) # Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list) # and save as cogpage.txt awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt rm cogpage.txt # Now we have the basic list of cogs and the letter code for each one. # TODD LOWE tRNA GENES (TO DO) # This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd # Lowe. See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields. # **Showing the tRNAScanSE instructions would be nice in the future. ssh hgwdev mkdir /cluster/data/sulfToko1/bed/loweTrnaGene cd /cluster/data/sulfToko1/bed/loweTrnaGene hgLoadBed -tab sulfToko1 loweTrnaGene sulfToko1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql # TODD LOWE snoRNA GENES (TO DO) # This is a bed 6 file created by hand. ssh hgwdev mkdir /cluster/data/sulfToko1/bed/loweSnoGene cd /cluster/data/sulfToko1/bed/loweSnoGene hgLoadBed -tab sulfToko1 loweSnoGene sulfToko1-snos.bed # TIGR GENES (TO DO) # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi # and fill out the web form as follows: # - Pick "Retrieve attributes for the specified DNA feature within a specific # organism and/or a specific role category". # * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" # from the 1st and 3rd box. # * Select everything from "Choose TIGR Annotation Gene Attributes" # * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes" # * Select everything from "Choose Other Gene Attributes" # - Click submit, and click save as tab-delimited file. ssh hgwdev mkdir /cluster/data/sulfToko1/bed/tigrCmrORFs cp sulfToko1-tigr.tab /cluster/data/sulfToko1/bed/tigrCmrORFs cd /cluster/data/sulfToko1/bed/tigrCmrORFs /projects/lowelab/users/aamp/bin/i386/tigrCmrToBed sulfToko1-tigr.tab sulfToko1-tigr.bed hgLoadBed -tab sulfToko1 tigrCmrORFs sulfToko1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql