# for emacs: -*- mode: sh; -*- ######################################################################### # hivmn1 DATABASE BUILD (STARTED, 10/16/07, DONE 10/19/07, Fan) ssh hiv1 mkdir -p /cluster/store12/medical/hiv/hivmn1 cd /cluster/store12/medical/hiv/hivmn1 # Get HIV MN base sequence (1,653 bases) from GSID # This sequence is re-created using the data sheets (page 7 131 - 7 133) # from VaxGene document, provided by Phil Berman. # The data sheets are manually read and entered into raw txt file first, then # manually checked and corrected. The base genome covers the DNA sequence # starting from the bases that has the initial AA translation and ending # at the codon of TAA. The final result is stored in file, hivmn1.fa. faToTab hivmn1.fa stdout|toUpper stdin hivmn1.tab # translate to nib ln -s /cluster/store12/medical/hiv/hivmn1 ~/hivmn1 cd ~/hivmn1 mkdir nib ln -s hivmn1.fa chr1.fa faToNib chr1.fa nib/chr1.nib # CREATING DATABASE # Create the hivmn1 database. echo 'create database hivmn1' | hgsql hiv1 # CREATING GRP TABLE FOR TRACK GROUPING (DONE 10/16/07) echo "create table grp (PRIMARY KEY(NAME)) select * from hiv1.grp" \ | hgsql hivmn1 # STORING O+O SEQUENCE AND ASSEMBLY INFORMATION # Make symbolic links from /gbdb/hivmn1/nib to the real nibs. mkdir -p /gbdb/hivmn1/nib ln -s /cluster/store12/medical/hiv/hivmn1/nib/chr1.nib /gbdb/hivmn1/nib # Load /gbdb/hivmn1/nib paths into database and save size info. hgsql hivmn1 < ~/src/hg/lib/chromInfo.sql cd ~/hivmn1 hgNibSeq -preMadeNib hivmn1 /gbdb/hivmn1/nib chr1.fa echo "select chrom,size from chromInfo" | hgsql -N hivmn1 > chrom.sizes # MAKE HGCENTRALHIV1 ENTRY AND TRACKDB TABLE FOR HIVMN echo 'insert into defaultDb values("HIV MN (GP120)", "hivmn1");' \ | hgsql -h localhost hgcentralhiv1 echo 'insert into dbDb values("hivmn1", "Oct. 2007", \ "/gbdb/hivmn1/nib", "HIV MN (GP120)", "chr1", 1, 2030, \ "HIV MN (GP120)","Human immunodeficiency virus 1", \ "/gbdb/hivmn1/html/description.html", 0, 0, " sequence as of Oct., 2007");' \ | hgsql hgcentralhiv1 -h localhost echo 'insert into genomeClade values("HIV MN (GP120)", "other", 110);'\ | hgsql hgcentralhiv1 -h localhost # Edit that makefile to add hivmn1 in all the right places cd ~src/hg/makeDb/trackDb vi makefile make update make alpha cvs update cvs commit makefile # MAKE HGCENTRALHIV1 BLATSERVERS ENTRY FOR HIVMN ssh hiv1 echo 'insert into blatServers values("hivmn1", "hiv1", "17786", "1", "0"); \ insert into blatServers values("hivmn1", "hiv1", "17787", "0", "0");' \ | hgsql hgcentralhiv1 -h localhost # CREATE TRACKDB TABLE hgsql hivmn1 < ~/src/hg/lib/trackDb.sql hgsql hivmn1 -e "insert into trackDb select * from hiv1.trackDb" # copy over tables from hiv1 mysqldump -d hiv1 dnaSeq -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into dnaSeq select * from hiv1.dnaSeq" mysqldump -d hiv1 aaSeq -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into aaSeq select * from hiv1.aaSeq" mysqldump -d hiv1 gsidSubjInfo -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into gsidSubjInfo select * from hiv1.gsidSubjInfo" mysqldump -d hiv1 gsidClinicRec -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into gsidClinicRec select * from hiv1.gsidClinicRec" mysqldump -d hiv1 gsIdXref -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into gsIdXref select * from hiv1.gsIdXref" mysqldump -d hiv1 vax004Msa -u medcat -p$HGPSWD|hgsql hivmn1 hgsql hivmn1 -e "insert into vax004Msa select * from hiv1.vax004Msa" # CREATE VAX004 TRACK # get vax004 sequences hgsql hivmn1 -N -e 'select * from dnaSeq where id like "%U%"' >vax004.tab # create .fa file tabToFa vax004 mkdir -p /gbdb/hivmn1/vax004 cp -p vax004.fa /gbdb/hivmn1/vax004/vax004.fa hgLoadSeq -replace hivmn1 /gbdb/hivmn1/vax004/vax004.fa # BLAT gfClient -minScore=200 -minIdentity=80 -nohead hiv1.cse.ucsc.edu 17785 /gbdb/hivmn1/nib \ -out=psl -t=dna -q=dna vax004.fa vax004.psl # count the result wc *.psl cut -f 10 vax004.psl |wc cut -f 10 vax004.psl |sort -u |wc # load the psl result into vax004 table hgLoadPsl hivmn1 vax004.psl # hgLoadPsl has some file permission problem. Finish this by manually load the psl.tab file. Hgsql hivmn -e 'load data local infile "psl.tab" into table vax004' # CREATE HIVGENE TRACK mkdir -p /cluster/store12/medical/hiv/hivmn1/hivGene cd /cluster/store12/medical/hiv/hivmn1/hivGene # clean up files from previous build # rm *.psl *.bed *.tab *.fa *.tmp # Get corresponding DNA gemomic sequences from hiv1 base genome. hgsql hiv1 -N -e \ 'select h.name, substring(b.seq, h.chromStart, h.chromEnd-h.chromStart) from baseSeq b, hivGene h where b.id="hiv1"' >hSeq.tmp # keep only the genes/regions within the gp120 region fgrep V hSeq.tmp >hSeq.tab fgrep vpu hSeq.tmp >>hSeq.tab fgrep gp120 hSeq.tmp >>hSeq.tab fgrep RRE hSeq.tmp >>hSeq.tab # create .fa file from .tab file tabToFa hSeq # BLAT the sequences agains hivmn1 base genome gfClient -minScore=10 -minIdentity=35 -nohead hiv1.cse.ucsc.edu 17787 \ /gbdb/hivmn1/nib -out=psl -t=dna -q=dna hSeq.fa jtemp.psl # Keep only the best ones pslReps -singleHit -minAli=0.35 -nohead jtemp.psl hSeq.psl jtemp.psr # load the BLAT result into a temp psl track hgLoadPsl hivmn1 hSeq.psl # NEED TO MANUALLY LOAD THE psl.tab, BECAUSE hgLoadPsl HAS PERMISSION PROBLEM ON hiv1. hgsql hivmn1 -e 'delete from hSeq' hgsql hivmn1 -e 'load data local infile "psl.tab" into table hSeq' # create .bed file for hivGene hgsql hivmn1 -N -e 'select "chr1", tStart, tEnd, qName from hSeq ' >hivGene.bed # load the bed file into hivGene table, needs to load manually bed.tab manually due # to permission problem. hgLoadBed hivmn1 hivGene hivGene.bed # remove temp files rm jtemp.* *.tmp psl.tab # CREATE INTERPRO TRACK cd /cluster/store12/medical/hiv/hivmn1 mkdir interPro cd interPro # get all HIV-1 domain sequences getInterProFa interProXrefHiv1 interProHiv1.fa # BLAT against base genome gfClient localhost 17786 /gbdb/hivmn1/nib -out=psl -t=dnax -q=prot interProHiv1.fa interProHiv1.psl # load it into a temp table hgLoadPsl hivmn1 table=testPsl interProHiv1.psl # finish above manually. # create bed file from this temp table hgsql hivmn1 -N -e \ 'select "chr1", tStart, tEnd, qName from testPsl where (tEnd-tStart)/3/qSize>0.42' \ > interProHiv1.bed # load the bed file into the track table hgLoadBed hivmn1 interPro interProHiv1.bed # drop the temp table. hgsql hivmn1 -e 'drop table testPsl' # CREATE CONSERVATION TRACKS mkdir -p /cluster/store12/medical/hiv/hivmn1/conservation cd /cluster/store12/medical/hiv/hivmn1/conservation # create the .wig file and .fa file of the consensus sequence. gsidMsa hivmn1 vax004Msa MN 166 vax004Cons.wig vax004Consensus.fa # encode and load the wig file wigEncode vax004Cons.wig stdout vax004Cons.wib \ | hgLoadWiggle hivmn1 vax004Cons stdin # copy .wib file to /gbdb mkdir -p /gbdb/hivmn1/wib cp vax004Cons.wib /gbdb/hivmn1/wib # do the same for protein conservation track mkdir aa cd aa # create .wig file gsidAaMsa2 hivmn1 vax004Msa MN 166 vax004AaCons.wig vax004AaConsensus.fa # encode and load the .wib file wigEncode vax004AaCons.wig stdout vax004AaCons.wib \ | hgLoadWiggle hivmn1 vax004AaCons stdin cp vax004AaCons.wib /gbdb/hivmn1/wib # CREATE MAF TRACKS mkdir -p /cluster/store12/medical/hiv/hivmn1/msa cd /cluster/store12/medical/hiv/hivmn1/msa # create a script file, doall hgsql hivmn1 -N -e \ 'select id from dnaSeq where id like "%U%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivmn1 -N -e "select id, seq from vax004Msa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX004 doall >mn1.tab # convert .tab into .fa file tabToFa mn1 # grab the base alignment sequence echo ">hivmn1" >mn1.aln hgsql hivmn1 -N -e 'select seq from vax004Msa where id="MN"' >> mn1.aln # prepare an interium file, jjAll.mfa cat mn1.aln mn1.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafMn1 to create a precursor file for the final .maf xmfaToMafMn1 jjAll.mfa j.out org1=hivmn1 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivmn1/vax004Maf cp chr1.maf /gbdb/hivmn1/vax004Maf -p echo before load hgLoadMaf hivmn1 vax004Maf # create another copy for protein MAF. mkdir -p /gbdb/hivmn1/vax004AaMaf cp -p chr1.maf /gbdb/hivmn1/vax004AaMaf hgLoadMaf hivmn1 vax004AaMaf