# for emacs: -*- mode: sh; -*- # This file describes building the browser database for the archaeal # species Methanosarcina acetivorans. # DOWNLOAD SEQUENCE FROM GENBANK (DONE) ssh eieio mkdir /cluster/store5/archae/methKand1 ln -s /cluster/store5/archae/methKand1 /cluster/data/methKand1 cd /cluster/data/methKand1 cp /projects/lowelab/db/Bacteria/Methanopyrus_kandleri/NC_003551.fna . mv NC_003551.fna NC_003551.fa # Edit header of *.fa to '> methKand1 >smallextrachr >largeextrachr' cat NC_003551.fa > methKand1.fa faToTwoBit methKand1.fa methKand1.2bit # CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE) ssh hgwdev echo 'create database methKand1' | hgsql '' cd /cluster/data/methKand1 faSize -detailed methKand1.fa > chrom.sizes echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \ | hgsql methKand1 echo 'INSERT INTO dbDb \ (name, description, nibPath, organism, \ defaultPos, active, orderKey, genome, scientificName, \ htmlPath, hgNearOk) values \ ("methKand1", "April 2002", "/gbdb/methKand1", "Methanopyrus kandleri", \ "chr:500000-550000", 1, 242, "Methanopyrus kandleri", \ "Methanopyrus kandleri AV19", "/gbdb/methKand1/html/description.html", \ 0);' \ | hgsql hgcentraltest echo 'INSERT INTO defaultDb (genome, name) values ("Methanopyrus kandleri", "methKand1");' \ | hgsql hgcentraltest echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Methanopyrus kandleri", "archaea",85);' \ | hgsql hgcentraltest # CREATE CHROMINFO TABLE (DONE) ssh hgwdev cd /cluster/data/methKand1 cp ~baertsch/kent/src/hg/lib/chromInfo.sql . hgsql methKand1 < chromInfo.sql echo "load data local infile 'chrom.sizes' into table chromInfo" | hgsql methKand1 echo "update chromInfo set fileName = '/gbdb/methKand1/methKand1.2bit'" | hgsql methKand1 cd ~/kent/src/hg/makeDb/trackDb # add the trackDb directories mkdir -p archae/methKand1 cvs add archae/methKand1 cvs commit cd ~/kent/src/hg/makeDb/trackDb # edit the trackDb makefile # add the trackDb directories mkdir -p archae/methKand1 cvs add archae cvs add archae/methKand1 cvs commit # GC20BASE (DONE) ssh kkstore02 mkdir -p /cluster/data/methKand1/bed/gc20Base cd /cluster/data/methKand1/bed/gc20Base hgGcPercent -wigOut -doGaps -file=stdout -win=20 methKand1 \ /cluster/data/methKand1/ | wigEncode stdin gc20Base.wig gc20Base.wib ssh hgwdev cd /cluster/data/methKand1/bed/gc20Base mkdir /gbdb/methKand1/wib ln -s `pwd`/gc20Base.wib /gbdb/methKand1/wib hgLoadWiggle -pathPrefix=/gbdb/methKand1/wib methKand1 gc20Base gc20Base.wig # verify index is correct: hgsql methKand1 -e "show index from gc20Base;" # should see good numbers in Cardinality column # TANDEM REPEAT MASKER (DONE) ssh hgwdev mkdir -p /cluster/data/methKand1/bed/simpleRepeat cd /cluster/data/methKand1 trfBig methKand1.fa /dev/null -bedAt=/cluster/data/methKand1/bed/simpleRepeat/chr.bed cd /cluster/data/methKand1/bed/simpleRepeat hgLoadBed methKand1 simpleRepeat *.bed -sqlTable=/cluster/home/baertsch/kent/src/hg/lib/simpleRepeat.sql # MULTIZ with methKand1, methJann1, methTher1 # DONE (10/11/05), kpollard cd /cluster/data/methKand1/bed/ mkdir conservation cd conservation cp /cluster/data/metMar1/bed/conservation/HoxD55.q . cp /cluster/data/metMar1/bed/conservation/metMar1.chr . cp /cluster/data/metMar1/bed/conservation/methJann1.chr . cp /cluster/data/metMar1/bed/conservation/methKand1.chr . cp /cluster/data/metMar1/bed/conservation/methTher1.chr . cp /cluster/data/metMar1/bed/conservation/metMar1.chr.nib . cp /cluster/data/metMar1/bed/conservation/methKand1.chr.nib . cp /cluster/data/metMar1/bed/conservation/methTher1.chr.nib . cp /cluster/data/metMar1/bed/conservation/methJann1.2bit . #chrom sizes faSize -detailed *.chr > chrom.sizes #blastz blastz methKand1.chr methJann1.chr Q=HoxD55.q > methKand1-methJann1.lav blastz methKand1.chr methTher1.chr Q=HoxD55.q > methKand1-methTher1.lav blastz methKand1.chr metMar1.chr Q=HoxD55.q > methKand1-metMar1.lav /cluster/bin/i386/lavToAxt methKand1-methJann1.lav . methJann1.2bit methKand1-methJann1.axt /cluster/bin/i386/lavToAxt methKand1-methTher1.lav . . methKand1-methTher1.axt /cluster/bin/i386/lavToAxt methKand1-metMar1.lav . . methKand1-metMar1.axt axtBest methKand1-methJann1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-methJann1-best.axt axtBest methKand1-methTher1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-methTher1-best.axt axtBest methKand1-metMar1.axt methKand1.chr -winSize=500 -minScore=5000 methKand1-metMar1-best.axt axtToMaf methKand1-methJann1-best.axt chrom.sizes chrom.sizes methKand1-methJann1.maf axtToMaf methKand1-methTher1-best.axt chrom.sizes chrom.sizes methKand1-methTher1.maf axtToMaf methKand1-metMar1-best.axt chrom.sizes chrom.sizes methKand1-metMar1.maf #multiz #remove extra header lines multiz methKand1-methJann1.maf methKand1-metMar1.maf - > methKand1-methJann1-metMar1.maf multiz methKand1-methTher1.maf methKand1-methJann1-metMar1.maf - > methKand1-methJann1-metMar1-methTher1.maf #phyloHMM /cluster/bin/phast/msa_view -i MAF -M methKand1.chr -o SS methKand1-methJann1-metMar1-methTher1.maf > methKand1.ss /cluster/bin/phast/phyloFit -i SS methKand1.ss -t "(methKand1,(methTher1,(methJann1,metMar1)))" -o MkMjMmMt /cluster/bin/phast/msa_view -i SS methKand1.ss --summary-only #add GC content to next call /cluster/bin/phast/phastCons methKand1.ss MkMjMmMt.mod --gc 0.5354 \ --target-coverage 0.7 --estimate-trees met-tree \ --expected-lengths 25 --no-post-probs --ignore-missing \ --nrates 1,1 /cluster/bin/phast/phastCons methKand1.ss \ met-tree.cons.mod,met-tree.noncons.mod \ --target-coverage 0.7 --expected-lengths 25 \ --viterbi methKand1-elements.bed --score \ --require-informative 0 --seqname chr > cons.dat wigEncode cons.dat phastCons.wig phastCons.wib /cluster/bin/phast/draw_tree MkMjMmMt.mod > met-tree.ps #compare to metMar1 met-tree.ps #move data mkdir wib mv phastCons.wib wib/phastCons.wib mv phastCons.wig wib/phastCons.wig ln -s /cluster/data/methKand1/bed/conservation/wib/phastCons.wib /gbdb/methKand1/wib mkdir /gbdb/methKand1/pwMaf mkdir -p otherSpp/methJann1 otherSpp/methTher1 otherSpp/metMar1 mv methKand1-methTher1.maf otherSpp/methTher1/chr.maf mv methKand1-methJann1.maf otherSpp/methJann1/chr.maf mv methKand1-metMar1.maf otherSpp/metMar1/chr.maf ln -s /cluster/data/methKand1/bed/conservation/otherSpp/methTher1 /gbdb/methKand1/pwMaf/methTher1_pwMaf ln -s /cluster/data/methKand1/bed/conservation/otherSpp/metMar1 /gbdb/methKand1/pwMaf/metMar1_pwMaf ln -s /cluster/data/methKand1/bed/conservation/otherSpp/methJann1 /gbdb/methKand1/pwMaf/methJann1_pwMaf mkdir multiz mv methKand1-methJann1-metMar1-methTher1.maf multiz/chr.maf ln -s /cluster/data/methKand1/bed/conservation/multiz /gbdb/methKand1/multizMkMjMmMt #load hgLoadWiggle methKand1 phastCons /cluster/data/methKand1/bed/conservation/wib/phastCons.wig hgLoadMaf -warn methKand1 multizMkMjMmMt hgLoadMaf -warn methKand1 methTher1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/methTher1_pwMaf hgLoadMaf -warn methKand1 metMar1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/metMar1_pwMaf hgLoadMaf -warn methKand1 methJann1_pwMaf -pathPrefix=/gbdb/methKand1/pwMaf/methJann1_pwMaf hgLoadBed methKand1 phastConsElements methKand1-elements.bed #trackDb cd ~/kent/src/hg/makeDb/trackDb/archae/ mkdir methKand1 cvs add methKand1 cd methKand1 #trackDb.ra entry # track multizMkMjMmMt # shortLabel Conservation # longLabel Thermoplasma/Ferroplasma/Picrophilus multiz alignments # group compGeno # priority 10.0 # visibility pack # type wigMaf 0.0 1.0 # maxHeightPixels 100:40:11 # wiggle phastCons # yLineOnOff Off # autoScale Off # pairwise pwMaf # speciesOrder methJann1 methTher1 metMar1 cvs add trackDb.ra cvs commit -m "New multiz track" trackDb.ra #html page cvs add multizMkMjMmMt.html cvs commit -m "Details page for multiz track" multizMkMjMmMt.html # DESCRIPTION PAGE () ssh hgwdev # Write ~/kent/src/hg/makeDb/trackDb/archae/methKand1/description.html chmod a+r ~/kent/src/hg/makeDb/trackDb/archae/methKand1/description.html # Check it in. mkdir /gbdb/methKand1/html ln -s /cluster/data/methKand1/html/description.html /gbdb/methKand1/html/ # GENBANK PROTEIN-CODING GENES () ssh hgwdev mkdir /cluster/data/methKand1/genbank cd /cluster/data/methKand1/genbank wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/Methanosarcina_acetivorans/NC_003552.gbk mv NC_003552.gbk methKand1.gbk # Create 3 files to assist parsing of the genbank # 1. for a bed file echo 'chr start end gene 1000 strand' > methKand1-params-bed.txt # 2. for the peptide parts echo 'gene translation' > methKand1-params-pep.txt # 3. for the other gene information echo 'gene product note' > methKand1-params-xra.txt # Now extract the genes and information: gbArchaeGenome methKand1.gbk methKand1-params-bed.txt methKand1-genbank-cds.bed gbArchaeGenome methKand1.gbk methKand1-params-pep.txt methKand1-genbank-cds.pep gbArchaeGenome methKand1.gbk methKand1-params-xra.txt methKand1-genbank-cds.xra hgLoadBed methKand1 gbProtCode methKand1-genbank-cds.bed hgsql methKand1 < ~/kent/src/hg/lib/pepPred.sql hgsql methKand1 < ~/kent/src/hg/lib/minGeneInfo.sql echo rename table pepPred to gbProtCodePep | hgsql methKand1 echo rename table minGeneInfo to gbProtCodeXra | hgsql methKand1 echo load data local infile \'methKand1-genbank-cds.pep\' into table gbProtCodePep | hgsql methKand1 echo load data local infile \'methKand1-genbank-cds.xra\' into table gbProtCodeXra | hgsql methKand1 #genbank to genePred csh tawk '{print $1,$2,$3,$4,$5,$6,$2,$3,0,1,$3-$2,0}' methKand1-genbank-cds.bed | bedToGenePred stdin tmp.gp tawk '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,substr($1,3,4),name2,"cmpl","cmpl",0}' tmp.gp > tmp2.gp join -t " " -o 1.1,1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 1.10 1.11 2.3 1.13 1.14 1.15 tmp2.gp methKand1-genbank-cds.xra > methKand1.gp # GENBANK rRNA GENES () ssh hgdev cd /cluster/data/methKand1/genbank gbArchaeGenome -kind=rRNA methKand1.gbk methKand1-params-bed.txt methKand1-rrnas.bed echo 'gene product NA' > methKand1-params-rrna-xra.txt gbArchaeGenome -kind=rRNA methKand1.gbk methKand1-params-rrna-xra.txt methKand1-rrnas-xra.txt hgLoadBed methKand1 gbRRNA methKand1-rrnas.bed hgsql methKand1 < ~/kent/src/hg/lib/minGeneInfo.sql echo rename table minGeneInfo to gbRRNAXra | hgsql methKand1 echo load data local infile \'methKand1-rrnas-xra.txt\' into table gbRRNAXra | hgsql methKand1 # COG STUFF () # Cut and paste http://www.ncbi.nlm.nih.gov/cgi-bin/COG/palox into emacs (COG list) # and save as cogpage.txt awk '{printf("%s\t%s\n",$6,$5)}' < cogpage.txt | sed -e 's/\[//' -e 's/\]//' > cogs.txt rm cogpage.txt # Now we have the basic list of cogs and the letter code for each one. # TODD LOWE tRNA GENES () # This one is a bed 6+ file created by hand of 46 tRNAs and 1 pseudo tRNA by Todd # Lowe. See ~/kent/src/hg/lib/loweTrnaGene.as for a description of the fields. # **Showing the tRNAScanSE instructions would be nice in the future. ssh hgwdev mkdir /cluster/data/methKand1/bed/loweTrnaGene cd /cluster/data/methKand1/bed/loweTrnaGene hgLoadBed -tab methKand1 loweTrnaGene methKand1-lowe-trnas.bed -sqlTable=~/kent/src/hg/lib/loweTrnaGene.sql # TODD LOWE snoRNA GENES () # This is a bed 6 file created by hand. ssh hgwdev mkdir /cluster/data/methKand1/bed/loweSnoGene cd /cluster/data/methKand1/bed/loweSnoGene hgLoadBed -tab methKand1 loweSnoGene methKand1-snos.bed # TIGR GENES () # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi # and fill out the web form as follows: # - Pick "Retrieve attributes for the specified DNA feature within a specific # organism and/or a specific role category". # * Pick "Pyrobaculum aerophilum IM2", and "Primary and TIGR annotation ORFs" # from the 1st and 3rd box. # * Select everything from "Choose TIGR Annotation Gene Attributes" # * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes" # * Select everything from "Choose Other Gene Attributes" # - Click submit, and click save as tab-delimited file. ssh hgwdev mkdir /cluster/data/methKand1/bed/tigrCmrORFs cp methKand1-tigr.tab /cluster/data/methKand1/bed/tigrCmrORFs cd /cluster/data/methKand1/bed/tigrCmrORFs /projects/lowelab/users/aamp/bin/i386/tigrCmrToBed methKand1-tigr.tab methKand1-tigr.bed hgLoadBed -tab methKand1 tigrCmrORFs methKand1-tigr.bed -sqlTable=~/kent/src/hg/lib/tigrCmrGene.sql