# for emacs: -*- mode: sh; -*- ######################################################################### # hivVax003Vax004 DATABASE BUILD (DONE 5/20/08, Fan) ssh hiv1 mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004 cd /cluster/store12/medical/hiv/hivVax003Vax004 ######################################################################### # Create hivVax003Vax004 DB hgsql –e 'create database hivVax003Vax004' # Ask admin to copy over all tables from hiv1 to hivVax003Vax004 ######################################################################### # CREATE MAF TRACKS FOR VAX004 mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa cd /cluster/store12/medical/hiv/hivVax003Vax004/msa # create a script file, doall hgsql hivVax003Vax004 -N -e \ 'select id from dnaSeq where id like "%U%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivVax003Vax004 -N -e "select id, seq from vax004Msa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX004 doall >Vax003Vax004.tab # convert .tab into .fa file tabToFa Vax003Vax004 # grab the base alignment sequence echo ">hivVax003Vax004" >Vax003Vax004.aln hgsql hivVax003Vax004 -N -e 'select seq from vax004Msa where id="HXB2"' >> Vax003Vax004.aln # prepare an interium file, jjAll.mfa cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf xmfaToMafVax003Vax004Vax004 jjAll.mfa j.out org1=hivVax003Vax004 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_U/U/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivVax003Vax004/vax004Maf cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p hgLoadMaf hivVax003Vax004 vax004Maf # create another copy for protein MAF. mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf hgLoadMaf hivVax003Vax004 vax004AaMaf ######################################################################### # CREATE CONSERVATION TRACKS FOR VAX003 AE STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE # create the .wig file and .fa file of the consensus sequence. gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AECons.wig vax003AEConsensus.fa # encode and load the wig file wigEncode vax003AECons.wig stdout vax003AECons.wib \ | hgLoadWiggle hivVax003Vax004 vax003AECons stdin # copy .wib file to /gbdb mkdir -p /gbdb/hivVax003Vax004/wib cp vax003AECons.wib /gbdb/hivVax003Vax004/wib # do the same for protein conservation track mkdir aa cd aa # create .wig file gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6228 vax003AEAaCons.wig vax003AEAaConsensus.fa # encode and load the .wib file wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib ######################################################################### # CREATE MAF TRACKS FOR VAX003 AE STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE # create a script file, doall hgsql hivVax003Vax004 -N -e \ 'select id from dnaSeq where id like "%T%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX003 AE doall >Vax003Vax004.tab # convert .tab into .fa file tabToFa Vax003Vax004 # grab the base alignment sequence echo ">hivVax003Vax004" >Vax003Vax004.aln hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln # prepare an interium file, jjAll.mfa cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp # rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivVax003Vax004/vax004Maf cp chr1.maf /gbdb/hivVax003Vax004/vax004Maf -p hgLoadMaf hivVax003Vax004 vax004Maf # create another copy for protein MAF. mkdir -p /gbdb/hivVax003Vax004/vax004AaMaf cp -p chr1.maf /gbdb/hivVax003Vax004/vax004AaMaf hgLoadMaf hivVax003Vax004 vax004AaMaf ######################################################################### # COPY OVER MSA TABLES FOR VAX003 B STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B # get table definition mysqldump -d hivVax003Vax004 vax003BMsa -u medcat -p$HGPSWD|hgsql hivVax003Vax004 # load the table hgsql hivVax003Vax004 -e "insert into vax003BMsa select * from hivVax003Vax004.vax003BMsa" ######################################################################### # CREATE CONSERVATION TRACKS FOR VAX003 B STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B # create the .wig file and .fa file of the consensus sequence. gsidMsa hivVax003Vax004 vax003BMsa HXB2 6228 vax003BCons.wig vax003BConsensus.fa # encode and load the wig file wigEncode vax003BCons.wig stdout vax003BCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003BCons stdin # copy .wib file to /gbdb mkdir -p /gbdb/hivVax003Vax004/wib cp vax003BCons.wib /gbdb/hivVax003Vax004/wib # do the same for protein conservation track mkdir aa cd aa # create .wig file gsidAaMsa2 hivVax003Vax004 vax003BMsa HXB2 6228 vax003BAaCons.wig vax003BAaConsensus.fa # encode and load the .wib file wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib ######################################################################### # CREATE MAF TRACKS FOR VAX003 B STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B # create a script file, doall hgsql hivVax003Vax004 -N -e \ 'select id from dnaSeq where id like "%T%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX004 doall >Vax003Vax004.tab # convert .tab into .fa file tabToFa Vax003Vax004 # grab the base alignment sequence echo ">hivVax003Vax004" >Vax003Vax004.aln hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln # prepare an interium file, jjAll.mfa cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivVax003Vax004/vax003BMaf cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p hgLoadMaf hivVax003Vax004 vax003BMaf # create another copy for protein MAF. mkdir -p /gbdb/hivVax003Vax004/vax003BAaMaf cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf hgLoadMaf hivVax003Vax004 vax003BAaMaf ######################################################################### # Process, check, correct and load VAX003 clinical tables mkdir -p /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical cd /data/home/fanhsu/medical/hiv/hivVax003Vax004/clinical # copy over original raw data files cp -p /cluster/store12/medical/vaxGen/fromEvie/VAX003/*.txt . ls -l *.txt # shorten the file name and run processRaw to generate .sql def cp "VAX003 RNACD4 match with sequence ID_20080501_EMZ18Jun.txt" VAX003_RNACD4080501.txt processRaw VAX003_RNACD4080501.txt hgsql hiv1 -e 'drop database hivVax003Vax004Build' hgsql hiv1 -e 'create database hivVax003Vax004Build' # create hivVax003Vax004Build DB to be used in this build process hgsql hiv1 -e 'create database hivVax003Vax004Build' # load raw demographic and RNACD3 data hgsql hivVax003Vax004Build < GSID_DEMOG_SEQNO_003Raw.sql hgsql hivVax003Vax004Build < VAX003_RNACD4080501Raw.sql hgsql hivVax003Vax004Build -e \ 'load data local infile "GSID_DEMOG_SEQNO_003.txt" into table GSID_DEMOG_SEQNO_003Raw ignore 1 lines' hgsql hivVax003Vax004Build -e \ 'load data local infile "VAX003_RNACD4080501.txt" into table VAX003_RNACD4080501Raw ignore 1 lines' # build initial gsidClinicRecTemp table ... hgsql hivVax003Vax004Build -N -e \ 'select "specId",GSID, MBLabcd, DRNACD4, "rna","cd4" from VAX003_RNACD4080501Raw' \ >gsidClinicRecTemp.tab hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecTemp' getDbTableDef hiv1 gsidClinicRecTemp >gsidClinicRecTemp.sql hgsql hivVax003Vax004Build < gsidClinicRecTemp.sql hgsql hivVax003Vax004Build -e \ 'load data local infile "gsidClinicRecTemp.tab" into table gsidClinicRecTemp' # build subjLabcode table ... hgsql hivVax003Vax004Build -N -e \ 'select GSID, MBLabcd from VAX003_RNACD4080501Raw where MBLabcd!=""' \ | sort -u > subjLabcode.tab hgsql hivVax003Vax004Build -e "drop table subjLabcode" getDbTableDef hiv1 subjLabcode > subjLabcode.sql hgsql hivVax003Vax004Build < subjLabcode.sql hgsql hivVax003Vax004Build -e \ 'load data local infile "subjLabcode.tab" into table subjLabcode' # fill in labCode in gsidClinicRecTemp hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp t, subjLabcode l set t.labCode=l.labCode where t.subjId=l.subjId' # fill in specimenId hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp t, GSID_DEMOG_SEQNO_003Raw r set t.specimenId=r.SpecimenNumber where t.subjId=r.subjId and r.SpecimenNumber !=""' # fill in RNA hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.hivQuan=r.RNA where t.subjId=r.GSID and t.daysCollection=r.DRNACD4' # fill in CD4 hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp t, VAX003_RNACD4080501Raw r set t.cd4Count=r.CD4ABS where t.subjId=r.GSID and t.daysCollection=r.DRNACD4' # change RNA "399" to "200" (which will be displayed as "<400") hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp set hivQuan="200" where hivQuan = "399"' # update cd4 NULL ... hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp set cd4Count="NULL" where cd4Count="."' # Echo update daysCollection NULL ... hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecTemp set daysCollection="NULL" where daysCollection="."' # build gsidClinicRecNew table hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecTemp ' \ |uniq |sed -e 's/NULL/-1/g' > gsidClinicRecNew.tab hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew' hgsql hivVax003Vax004Build < gsidClinicRecNew.sql hgsql hivVax003Vax004Build -e \ 'load data local infile "gsidClinicRecNew.tab" into table gsidClinicRecNew' # set NULLs hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecNew set hivQuan=NULL where hivQuan=-1' hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecNew set cd4Count=NULL where cd4Count=-1' hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecNew set daysCollection=NULL where daysCollection=-1' hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecNew set specimenId=NULL where specimenId="specId"' # build gsidClinicRecWithSeqNew table hgsql hivVax003Vax004Build -N -e \ 'select c.* from GSID_DEMOG_SEQNO_003Raw r,gsidClinicRecNew c where SequenceDataStatus="Sequence data exist" and r.subjId=c.subjId and r.labCode=c.labCode' \ |sort -u |sed -e 's/NULL/-1/g' >gsidClinicRecWithSeqNew.tmp hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecWithSeqNew' hgsql hivVax003Vax004Build < gsidClinicRecWithSeqNew.sql hgsql hivVax003Vax004Build -e \ 'load data local infile "gsidClinicRecWithSeqNew.tmp" into table gsidClinicRecWithSeqNew' rm gsidClinicRecWithSeqNew.tmp hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecWithSeqNew set hivQuan=NULL where hivQuan=-1' hgsql hivVax003Vax004Build -e \ 'update gsidClinicRecWithSeqNew set cd4Count=NULL where cd4Count=-1' # compare with previous old data hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecWithSeqNew' |sort -u >j.n hgsql hivVax003Vax004 -N -e 'select * from gsidClinicRecWithSeq'|sort -u >j.o diff j.o j.n |grep -v "GSID4" >j.diff # load the newly build data into hivVax003Vax004 tables hgsql hivVax003Vax004 -e 'delete from gsidClinicRec where subjId like "GSID3%"' hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"' hgsql hivVax003Vax004 -e \ "insert into gsidClinicRec select * from hivVax003Vax004Build.gsidClinicRecNew" hgsql hivVax003Vax004 -e \ "insert into gsidClinicRecWithSeq select * from hivVax003Vax004Build.gsidClinicRecWithSeqNew" ######################################################################### # Build the gsidSubjSeq table (used by Table View). gsidSubjSeq hivVax003Vax004 dnaSeqId > j.dna gsidSubjSeq hivVax003Vax004 aaSeqId > j.aa cut -f 1 j.dna >j.1 cut -f 1 j.aa >j.2 cut -f 2 j.dna >j.3 cut -f 2 j.aa >j.4 paste j.1 j.3 j.4> gsidSubjSeq.tab hgsql hivVax003Vax004 -e 'delete from gsidSubjSeq' hgsql hivVax003Vax004 -e \ 'load data local infile "gsidSubjSeq.tab" into table gsidSubjSeq' rm j.1 j.2 j.3 j.4 j.dna j.aa ################################################################################# # RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 AE STRAIN (DONE, 7/10/08, Fan) # First cut the vax003AEMsa sequences so that they start with VPV and end with REKR # rename existing vax003AEMsa table as vax003AEMsaOld hgsql hivVax003Vax004 –e 'rename table vax003AEMsa to vax003AEMsaOld' # use BLAT to visually decide what are the appropriate starting and ending positions to cut. hgsql hivVax003Vax004 -N -e \ 'select id,substring(seq, 124, 1743) from vax003AEMsaOld' >vax003AEMsa.tab tabToFa vax003AEMsa # use resulting vax003AEMsa.fa to check that the cut is correct, and then load the new MSA sequences. hgsql hivVax003Vax004 < ~/src/hg/lib/vax003AEMsa.sql hgsql hivVax003Vax004 -N -e 'load data local infile "vax003AEMsa.tab" into table vax003AEMsa' # RE-BUILD CONSERVATION TRACKS FOR VAX003 AE STRAIN mkdir -p \ /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/AE/rebuild # create the .wig file and .fa file of the consensus sequence. gsidMsa hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AECons.wig vax003AEConsensus.fa # encode and load the wig file wigEncode vax003AECons.wig stdout vax003AECons.wib \ | hgLoadWiggle hivVax003Vax004 vax003AECons stdin # copy .wib file to /gbdb mkdir -p /gbdb/hivVax003Vax004/wib cp vax003AECons.wib /gbdb/hivVax003Vax004/wib # do the same for protein conservation track mkdir aa cd aa # create .wig file gsidAaMsa2 hivVax003Vax004 vax003AEMsa HXB2 6348 vax003AEAaCons.wig vax003AEAaConsensus.fa # encode and load the .wib file wigEncode vax003AEAaCons.wig stdout vax003AEAaCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003AEAaCons stdin cp vax003AEAaCons.wib /gbdb/hivVax003Vax004/wib # CREATE MAF TRACKS FOR VAX003 AE STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/AE/rebuild # create a script file, doall hgsql hivVax003Vax004 -N -e \ 'select id from dnaSeq where id like "%T%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivVax003Vax004 -N -e "select id, seq from vax003AEMsa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX003 AE doall >Vax003Vax004.tab # convert .tab into .fa file tabToFa Vax003Vax004 # grab the base alignment sequence echo ">hivVax003Vax004" >Vax003Vax004.aln hgsql hivVax003Vax004 -N -e 'select seq from vax003AEMsa where id="HXB2"' >> Vax003Vax004.aln # prepare an interium file, jjAll.mfa cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp # rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf cp chr1.maf /gbdb/hivVax003Vax004/vax003AEMaf -p hgLoadMaf hivVax003Vax004 vax003AEMaf # create another copy for protein MAF. mkdir -p /gbdb/hivVax003Vax004/vax003AEMaf cp -p chr1.maf /gbdb/hivVax003Vax004/vax003AEAaMaf hgLoadMaf hivVax003Vax004 vax003AEAaMaf ################################################################################# # RE-BUILD CONSERVATION AND MAF TRACKS FOR VAX003 B STRAIN (DONE, 7/10/08, Fan) # First cut the vax003BMsa sequences so that they start with VPV and end with REKR # rename existing vax003BMsa table as vax003BMsaOld hgsql hivVax003Vax004 –e 'rename table vax003BMsa to vax003BMsaOld' # use BLAT to visually decide what are the appropriate starting and ending positions to cut. hgsql hivVax003Vax004 -N -e \ 'select id,substring(seq, 121, 1620) from vax003BMsaOld' >vax003BMsa.tab tabToFa vax003BMsa # use resulting vax003BMsa.fa to check that the cut is correct, then load the new MSA sequences. hgsql hivVax003Vax004 -e 'drop table vax003BMsa' hgsql hivVax003Vax004 < ~/src/hg/lib/vax003BMsa.sql hgsql hivVax003Vax004 -N -e 'load data local infile "vax003BMsa.tab" into table vax003BMsa' # RE-BUILD CONSERVATION TRACKS FOR VAX003 B STRAIN mkdir -p \ /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild cd /cluster/store12/medical/hiv/hivVax003Vax004/conservation/B/rebuild # create the .wig file and .fa file of the consensus sequence. gsidMsa hivVax003Vax004 vax003BMsa HXB2 6348 vax003BCons.wig vax003BConsensus.fa # encode and load the wig file wigEncode vax003BCons.wig stdout vax003BCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003BCons stdin # copy .wib file to /gbdb mkdir -p /gbdb/hivVax003Vax004/wib cp vax003BCons.wib /gbdb/hivVax003Vax004/wib # do the same for protein conservation track mkdir aa cd aa # create .wig file gsidAaMsa2 hivVax003Vax004 vax0 '03BMsa HXB2 6348 vax003BAaCons.wig vax003BAaConsensus.fa # encode and load the .wib file wigEncode vax003BAaCons.wig stdout vax003BAaCons.wib \ | hgLoadWiggle hivVax003Vax004 vax003BAaCons stdin cp vax003BAaCons.wib /gbdb/hivVax003Vax004/wib # CREATE MAF TRACKS FOR VAX003 B STRAIN mkdir -p /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild cd /cluster/store12/medical/hiv/hivVax003Vax004/msa/B/rebuild # create a script file, doall hgsql hivVax003Vax004 -N -e \ 'select id from dnaSeq where id like "%T%"'\ |sed -e 's/ss/do1 ss/g' >doall # create one line script file, do1, with the following line in it: hgsql hivVax003Vax004 -N -e "select id, seq from vax003BMsa where id='${1}'" chmod +x do* # run the script to get the .tab file with all MSA sequences of VAX003 B doall >Vax003Vax004.tab # convert .tab into .fa file tabToFa Vax003Vax004 # grab the base alignment sequence echo ">hivVax003Vax004" >Vax003Vax004.aln hgsql hivVax003Vax004 -N -e 'select seq from vax003BMsa where id="HXB2"' >> Vax003Vax004.aln # prepare an interium file, jjAll.mfa cat Vax003Vax004.aln Vax003Vax004.fa >jjAll.mfa echo = >>jjAll.mfa # Run xmfaToMafVax003Vax004 to create a precursor file for the final .maf xmfaToMafVax003Vax004 jjAll.mfa j.out org1=hivVax003Vax004 cat j.out|sed -e 's/\./_/g'|sed -e 's/_chr/\.chr/g' >chr1.tmp # rm jjAll.mfa j.out cat chr1.tmp |sed -e 's/ss_T/T/g' >chr1.maf # copy .maf to /gbdb. mkdir -p /gbdb/hivVax003Vax004/vax003BMaf cp chr1.maf /gbdb/hivVax003Vax004/vax003BMaf -p hgLoadMaf hivVax003Vax004 vax003BMaf # create another copy for protein MAF. mkdir -p /gbdb/hivVax003Vax004/vax003BMaf cp -p chr1.maf /gbdb/hivVax003Vax004/vax003BAaMaf hgLoadMaf hivVax003Vax004 vax003BAaMaf ######################################################################################## # REBUILD THE gsidClinicRecWithSeq TABLE (DONE 11/03/08, Fan) mkdir -p /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild cd /hive/groups/gsid/medical/hiv/hivVax003Vax004/clinical/novRebuild #copy table gsidClinicRecNew into gsidClinicRecNew2 cp ~/hg/lib/gsidClinicRec.sql gsidClinicRecNew2.sql vi gsidClinicRecNew2.sql hgsql hivVax003Vax004Build -e 'drop table gsidClinicRecNew2' hgsql hivVax003Vax004Build < gsidClinicRecNew2.sql hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew' >gsidClinicRecNew2.tab hgsql hivVax003Vax004Build -e \ 'load data local infile "gsidClinicRecNew2.tab" into table gsidClinicRecNew2' hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >j.tab diff j.tab gsidClinicRecNew2.tab # change 200 to 399 so that they are consistent between two table hgsql hivVax003Vax004Build -e 'update gsidClinicRecNew2 set hivQuan=399 where hivQuan=200' hgsql hivVax003Vax004Build -N -e 'select * from gsidClinicRecNew2' >jj.tab diff j.tab jj.tab # rebuild the gsidClinicRecWithSeq table for VAX003 subjects hgsql hivVax003Vax004Build -N -e \ 'select c.specimenId, c.subjId, c.labCode, c.daysCollection, r.RNA, r.CD4ABS from gsidClinicRecNew2 c, VAX003_RNACD4080501Raw r where c.specimenId=r.SpecimenNo and c.daysCollection=r.DRNACD4 and c.subjId=r.GSID and r.RNA=c.hivQuan and r.CD4ABS=c.cd4Count and r.SpecimenNo != ""' >j.out cut -f 1-4 j.out >j.1 # revert back from 399 to 200 cut -f 5-6 j.out | sed -e 's/399\t/200\t/' >j.2 paste j.1 j.2 >gsidClinicRecWithSeq.vax003.tab hgsql hivVax003Vax004 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"' hgsql hivVax003Vax004 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq' # update the same table for the other 3 genomes hgsql hivgne8v2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"' hgsql hivgne8v2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq' hgsql hivmn2 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"' hgsql hivmn2 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq' hgsql hiva244 -e 'delete from gsidClinicRecWithSeq where subjId like "GSID3%"' hgsql hiva244 -e 'load data local infile "gsidClinicRecWithSeq.vax003.tab" into table gsidClinicRecWithSeq' ###################################################################################### # Create VAX003 subtype B Positive Selection tracks for hivVax003Vax004 cd /hive/groups/gsid/medical/hiv/hivVax003Vax004 mkdir posSelection cd posSelection # BLAT /hive/groups/gsid/medical/hiv/hiva244/posSelection/BMsaAaConsensus.fa # against hivVax003Vax004 base genome, select psl without header option # cut and paste the result into the file BMsa.psl hgLoadPsl -keep -table=BMsaPsl -nobin hivVax003Vax004 BMsa.psl # will get the following error: #Processing BMsa.psl #Can't start query: #LOAD DATA CONCURRENT INFILE '/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl' INTO TABLE BMsaPsl #mySQL error 13: Can't get stat of '/cluster/hive/groups/gsid/medical/hiv/hivVax003Vax004/posSelection/BMsa.psl' (Errcode: 13) # load manually then hgsql hivVax003Vax004 load data local infile "BMsa.psl" into table BMsaPsl; quit # build the positive selection tracks for model 2 and model 8. gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel2 posSelModel2.bed hgLoadBed hivVax003Vax004 posSelModel2 posSelModel2.bed gsidPosSelect hivVax003Vax004 BMsaPsl posSelBuild pSelectBModel8 posSelModel8.bed hgLoadBed hivVax003Vax004 posSelModel8 posSelModel8.bed ########################################################################## # BUILD THE POSITIVE SELECTION TRACKS FOR VAX003 SUBTYPE AE ssh hiv1 mkdir -p /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004 cd /hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004 # BLAT # /cluster/hive/groups/gsid/medical/hiv/posSelection/AE/AEMsaAaConsensus.fa # against hivVax003Vax004 base genome, select psl without header option # cut and paste the result into the file AEMsa.psl hgLoadPsl -keep -table=AEMsaPsl -nobin hivVax003Vax004 AEMsa.psl # will get the following error: #Processing AEMsa.psl #Can't start query: #LOAD DATA CONCURRENT INFILE #'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l' INTO TABLE AEMsaPsl #mySQL error 13: Can't get stat of #'/cluster/hive/groups/gsid/medical/hiv/posSelection/AE/hivVax003Vax004/AEMsa.ps#l' (Errcode: 13) # load manually then hgsql hivVax003Vax004 load data local infile "AEMsa.psl" into table AEMsaPsl; quit # build positive selection tracks for model 2 and model 8. gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel2 posSelAEModel2.bed hgLoadBed hivVax003Vax004 posSelAEModel2 posSelAEModel2.bed gsidPosSelect hivVax003Vax004 AEMsaPsl posSelBuild pSelectAEModel8 posSelAEModel8.bed hgLoadBed hivVax003Vax004 posSelAEModel8 posSelAEModel8.bed ########################################################################## # BUILD THE POSITIVE SELECTION TRACKS FOR VAX004 (Done Fan, 3/2/09) cd /cluster/hive/groups/gsid/medical/hiv/posSelection mkdir vax004 cd vax004 # Since there are large number (12) of subclasses and 4 HIV genomes, # this has to be automated. So create the do1, do2, do3 script first. # Please note that the do3 script works on all 4 HIV genomes. cat << '_EOF_' >do1 #do1.1 mkdir -p $1 # start with clean slate rm $1/* cp -p /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1 cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/data/$1.nex $1 cp /hive/groups/gsid/medical/vaxGen/fromKeith/posSelection/073008/PAML-outfiles/VAX004-$1-sites.paml $1 #do1.2 cd $1 cat VAX004-$1-sites.paml|grep "+-" >j.tmp get1stHalf j.tmp >$1Model2.paml cat $1Model2.paml |\ sed -e 's/+-//g'|\ sed -e 's/ \* / xxx /g'|\ sed -e 's/\*//g'|\ sed -e 's/xxx/\*/g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ //'|\ sed -e 's/ /\t/g' > vax004$1Model2.tab hgLoadSqlTab -notOnServer hgFixed vax004$1Model2 ~/src/hg/lib/posSelectModel.sql vax004$1Model2.tab get2ndHalf j.tmp >$1Model8.paml cat $1Model8.paml |\ sed -e 's/+-//g'|\ sed -e 's/ \* / xxx /g'|\ sed -e 's/\*//g'|\ sed -e 's/xxx/\*/g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ //'|\ sed -e 's/ /\t/g' > vax004$1Model8.tab hgLoadSqlTab -notOnServer hgFixed vax004$1Model8 ~/src/hg/lib/posSelectModel.sql vax004$1Model8.tab rm j.tmp #do1.3 cat $1.nex|grep 'U\.'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ / /g'|\ sed -e 's/ /\t/g' >vax004$1Msa.tab chmod +rx *.tab hgLoadSqlTab -notOnServer hgFixed vax004$1Msa /hive/groups/gsid/medical/hiv/posSelection/vax004/dnaSeq.sql vax004$1Msa.tab cd .. #do1.4 hgsql -N -e "select concat('do2 ${1} ', id) from hgFixed.vax004${1}Msa limit 1" >doit chmod +x doit doit '_EOF_' chmod +x do1 cat << '_EOF_' >do2 gsidAaMsa2 hgFixed vax004$1Msa $2 1 $1/$1Msa.wig $1/$1MsaAaConsensus.fa '_EOF_' chmod +x do2 cat << '_EOF_' >do3 # process hivVax003Vax004 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivVax003Vax004 ', port) from blatServers where db='hivVax003Vax004' and isTrans=1" >doBlat chmod +x doBlat ./doBlat cd $1 gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed hgLoadBed hivVax003Vax004 posSelVax004$1Model2 posSelVax004$1Model2.bed gsidPosSelect hivVax003Vax004 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed hgLoadBed hivVax003Vax004 posSelVax004$1Model8 posSelVax004$1Model8.bed cd .. # process hivmn2 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivmn2 ', port) from blatServers where db='hivmn2' and isTrans=1" >doBlat chmod +x doBlat ./doBlat cd $1 gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed hgLoadBed hivmn2 posSelVax004$1Model2 posSelVax004$1Model2.bed gsidPosSelect hivmn2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed hgLoadBed hivmn2 posSelVax004$1Model8 posSelVax004$1Model8.bed cd .. # process hivgne8v2 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hivgne8v2 ', port) from blatServers where db='hivgne8v2' and isTrans=1" >doBlat chmod +x doBlat ./doBlat cd $1 gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed hgLoadBed hivgne8v2 posSelVax004$1Model2 posSelVax004$1Model2.bed gsidPosSelect hivgne8v2 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed hgLoadBed hivgne8v2 posSelVax004$1Model8 posSelVax004$1Model8.bed cd .. # process hiva244 hgsql hgcentralhiv1 -N -e "select concat('blatit ${1} hiva244 ', port) from blatServers where db='hiva244' and isTrans=1" >doBlat chmod +x doBlat ./doBlat cd $1 gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model2 posSelVax004$1Model2.bed hgLoadBed hiva244 posSelVax004$1Model2 posSelVax004$1Model2.bed gsidPosSelect hiva244 vax004$1MsaPsl hgFixed vax004$1Model8 posSelVax004$1Model8.bed hgLoadBed hiva244 posSelVax004$1Model8 posSelVax004$1Model8.bed cd .. '_EOF_' chmod +x do3 # Now run the scripts for all subclasses. do1 Hispanic do1 Midwest do1 Northeast do1 Other do1 South do1 Southwest do1 Westcoast do1 White do1 Asian do1 Black do1 pla do1 vac # BTW, do1 calls do2 do3 Hispanic do3 Midwest do3 Northeast do3 Other do3 South do3 Southwest do3 Westcoast do3 White do3 Asian do3 Black do3 pla do3 vac ########################################################################## # BUILD IMMUNO TABLE FOR IMMUOGENICITY DATA. (DONE 9/29/09, Fan) cd ~/kent/src/hg/lib hgsql hgFixed -e 'drop table immunoRaw' hgsql hgFixed < immunoRaw.sql cd /cluster/hive/groups/gsid/medical/hiv/hivmnV3 # copy over raw data file. cp -p /cluster/hive/groups/gsid/medical/hiv/immunogenicityData/orig/Antibody_Inf_LPLT_31Jul_forFH.txt jRaw.txt # replace empty file with "-3", "N/A" with "-1", and "N/D" with "-2" cat jRaw.txt|\ sed -e 's/\t\t/\t-3\t/g'|\ sed -e 's/\t\t/\t-3\t/g'|\ sed -e 's/\t\t/\t-3\t/g'|\ sed -e 's/\t\t/\t-3\t/g'|\ sed -e 's/\t\t/\t-3\t/g'|\ sed -e 's/N\/A/-1/g'|\ sed -e 's/N\/D/-2/g' >jRaw2.txt # load data into immunoRaw table hgsql hgFixed -e 'load data local infile "jRaw2.txt" into table immunoRaw ignore 1 lines' hgsql hgFixed -e 'update immunoRaw set LastTrAntiGP120="-3" where LastTrAntiGP120=""' hgsql hgFixed -e 'select * from immunoRaw' >immuno.tab # load data into immuno table hgsql hgFixed -e 'drop table immuno' hgsql hgFixed < immuno.sql hgsql hgFixed -e 'load data local infile "immuno.tab" into table immuno' # replace NULL with -1 for SDayLastPTest. hgsql hgFixed -e 'update immuno set SDayLastPTest = NULL where SDayLastPTest = -1' #######################################################################################