# for emacs: -*- mode: sh; -*- # This file describes how we make the BlastTab tables for knownGenes # sgdGenes flyBaseGenes wormGenes # Run on the occassion of Ensembl v59 release to update danRer6 ensembl # blast tabs on all other organisms. In the process, discovered # some confusion in previous builds of blastTabs, therefore everything # rebuilt ############################################################################ # DONE 2010-08-18 in this case in this danRer6 directory # where you can find this procedure encapsulated in scripts. # the copy here is merely the contents of the scripts that run # these procedures. mkdir /hive/data/genomes/danRer6/bed/ensGene.59/blastTab cd /hive/data/genomes/danRer6/bed/ensGene.59/blastTab export runDir="/hive/data/genomes/danRer6/bed/ensGene.59/blastTab" ############################################################################ # prepare peptide fasta files for all mkdir -p ${runDir} cd ${runDir} # get all the protein sets to be used here pepPredToFa danRer6 ensPep danRer6.ensembl.faa pepPredToFa sacCer2 sgdPep sacCer2.sgd.faa pepPredToFa dm3 flyBasePep dm3.flyBase.faa pepPredToFa ce6 sangerPep ce6.sanger.faa for D in hg19 mm9 rn4 do pepPredToFa $D knownGenePep $D.known.faa done ############################################################################ # construct each *.config.ra file # known gene tables for D in hg19 mm9 rn4 do mkdir -p $D echo "targetGenesetPrefix known targetDb $D" > ${D}/${D}.config.ra echo "queryDbs hg19 mm9 rn4 ce6 dm3 danRer6 sacCer2" \ | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra echo "hg19Fa ${runDir}/hg19.known.faa mm9Fa ${runDir}/mm9.known.faa rn4Fa ${runDir}/rn4.known.faa sacCer2Fa ${runDir}/sacCer2.sgd.faa ce6Fa ${runDir}/ce6.sanger.faa dm3Fa ${runDir}/dm3.flyBase.faa danRer6Fa ${runDir}/danRer6.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra done # sgd gene tables for D in sacCer2 do mkdir -p $D echo "targetGenesetPrefix sgd targetDb $D queryDbs hg19 mm9 rn4 ce6 dm3 danRer6 hg19Fa ${runDir}/hg19.known.faa mm9Fa ${runDir}/mm9.known.faa rn4Fa ${runDir}/rn4.known.faa sacCer2Fa ${runDir}/sacCer2.sgd.faa ce6Fa ${runDir}/ce6.sanger.faa dm3Fa ${runDir}/dm3.flyBase.faa danRer6Fa ${runDir}/danRer6.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # flyBase table for D in dm3 do mkdir -p $D echo "targetGenesetPrefix flyBase targetDb $D queryDbs hg19 mm9 rn4 ce6 sacCer2 danRer6 hg19Fa ${runDir}/hg19.known.faa mm9Fa ${runDir}/mm9.known.faa rn4Fa ${runDir}/rn4.known.faa sacCer2Fa ${runDir}/sacCer2.sgd.faa ce6Fa ${runDir}/ce6.sanger.faa dm3Fa ${runDir}/dm3.flyBase.faa danRer6Fa ${runDir}/danRer6.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # sanger table for D in ce6 do mkdir -p $D echo "targetGenesetPrefix sanger targetDb $D queryDbs hg19 mm9 rn4 dm3 sacCer2 danRer6 hg19Fa ${runDir}/hg19.known.faa mm9Fa ${runDir}/mm9.known.faa rn4Fa ${runDir}/rn4.known.faa sacCer2Fa ${runDir}/sacCer2.sgd.faa ce6Fa ${runDir}/ce6.sanger.faa dm3Fa ${runDir}/dm3.flyBase.faa danRer6Fa ${runDir}/danRer6.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done ########################################################################### # in each directory ce6 rn4 dm3 mm9 sacCer2 hg19 # running the kluster job cd sacCer6 time nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \ sacCer2.config.ra > do.log 2>&1 real 39m3.306s user 0m0.589s sys 0m0.628s cd ../dm3 nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \ dm3.config.ra > do.log 2>&1 cd ../ce6 nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \ ce6.config.ra > do.log 2>&1 cd ../mm9 nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \ mm9.config.ra > do.log 2>&1 cd ../rn4 nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \ rn4.config.ra > do.log 2>&1 cd ../hg19 nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \ hg19.config.ra > do.log 2>&1 ########################################################################### # running syntenic best on human, mouse, rat combinations synBlastp.csh mm9 rn4 # db=mm9 # otherDb=rn4 # genePredToFakePsl: # pslMap via /gbdb/mm9/liftOver/mm9ToRn4.over.chain.gz : # hgLoadPsl: # Processing mm9.rn4.kg.psl # hgMapToGene: # mm9.rnBlastTab: # old number of unique query values: # 33419 # old number of unique target values # 7090 # new number of unique query values: # 15038 # new number of unique target values # 6915 synBlastp.csh mm9 hg19 # db=mm9 # otherDb=hg19 # genePredToFakePsl: # pslMap via /gbdb/mm9/liftOver/mm9ToHg19.over.chain.gz : # hgLoadPsl: # Processing mm9.hg19.kg.psl # hgMapToGene: # mm9.hgBlastTab: # old number of unique query values: # 42100 # old number of unique target values # 22528 # new number of unique query values: # 38924 # new number of unique target values # 21877 synBlastp.csh hg19 mm9 # db=hg19 # otherDb=mm9 # genePredToFakePsl: # pslMap via /gbdb/hg19/liftOver/hg19ToMm9.over.chain.gz : # hgLoadPsl: # Processing hg19.mm9.kg.psl # hgMapToGene: # hg19.mmBlastTab: # old number of unique query values: # 60635 # old number of unique target values # 22100 # new number of unique query values: # 57302 # new number of unique target values # 21627 synBlastp.csh hg19 rn4 # db=hg19 # otherDb=rn4 # genePredToFakePsl: # pslMap via /gbdb/hg19/liftOver/hg19ToRn4.over.chain.gz : # hgLoadPsl: # Processing hg19.rn4.kg.psl # hgMapToGene: # hg19.rnBlastTab: # old number of unique query values: # 48744 # old number of unique target values # 6894 # new number of unique query values: # 25086 # new number of unique target values # 6669 synBlastp.csh rn4 mm9 # db=rn4 # otherDb=mm9 # genePredToFakePsl: # pslMap via /gbdb/rn4/liftOver/rn4ToMm9.over.chain.gz : # hgLoadPsl: # Processing rn4.mm9.kg.psl # hgMapToGene: # rn4.mmBlastTab: # old number of unique query values: # 8024 # old number of unique target values # 7095 # new number of unique query values: # 7751 # new number of unique target values # 6949 synBlastp.csh rn4 hg19 # db=rn4 # otherDb=hg19 # genePredToFakePsl: # pslMap via /gbdb/rn4/liftOver/rn4ToHg19.over.chain.gz : # hgLoadPsl: # Processing rn4.hg19.kg.psl # hgMapToGene: # rn4.hgBlastTab: # old number of unique query values: # 7993 # old number of unique target values # 6915 # new number of unique query values: # 7489 # new number of unique target values # 6636 ########################################################################### # running recip best to ce6 dm3 and sacCer2 # on human, mouse and rat export TARGET=hg export TARGET_DB=hg19 for DB in ce6 dm3 sacCer2 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done export TARGET=mm export TARGET_DB=mm9 for DB in ce6 dm3 sacCer2 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done export TARGET=rn export TARGET_DB=rn4 for DB in ce6 dm3 sacCer2 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done export TARGET=ce export TARGET_DB=ce6 for DB in dm3 sacCer2 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done export TARGET=sc export TARGET_DB=sacCer2 for DB in dm3 ce6 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done export TARGET=dm export TARGET_DB=dm3 for DB in sacCer2 ce6 do echo $DB aToB=run.${TARGET_DB}.$DB bToA=run.$DB.${TARGET_DB} mkdir $aToB $bToA cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab echo blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab done DB=danRer6 for tDB in hg19 mm9 rn4 ce6 dm3 sacCer2 do echo $DB aToB=run.${DB}.${tDB} bToA=run.${tDB}.${DB} mkdir -p $aToB cat ../${tDB}/$aToB/out/*.tab > $aToB/all.tab cat ../${tDB}/$bToA/out/*.tab > $bToA/all.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer6) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo "hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab" hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab done # hgLoadBlastTab hg19 drBlastTab run.danRer6.hg19/recipBest.tab # Loading database with 12146 rows # hgLoadBlastTab mm9 drBlastTab run.danRer6.mm9/recipBest.tab # Loading database with 11997 rows # hgLoadBlastTab rn4 drBlastTab run.danRer6.rn4/recipBest.tab # Loading database with 5142 rows # hgLoadBlastTab ce6 drBlastTab run.danRer6.ce6/recipBest.tab # Loading database with 4865 rows # hgLoadBlastTab dm3 drBlastTab run.danRer6.dm3/recipBest.tab # Loading database with 5765 rows # hgLoadBlastTab sacCer2 drBlastTab run.danRer6.sacCer2/recipBest.tab # Loading database with 2174 rows ########################################################################### # updating drBlastTab for new EnsGene release (DONE - 2012-10-12 - Hiram) mkdir /hive/data/genomes/danRer7/bed/ensGene.68/blastTab cd /hive/data/genomes/danRer7/bed/ensGene.68/blastTab cat << '_EOF_' > prepFasta.sh #!/bin/sh export sacCer="sacCer3" export ce="ce6" export danRer="danRer7" export dm="dm3" export rn="rn4" export hg="hg19" export mm="mm10" export rnGene="rgdGene2" export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" mkdir -p ${runDir} cd ${runDir} # get all the protein sets to be used here pepPredToFa ${danRer} ensPep ${danRer}.ensembl.faa pepPredToFa ${sacCer} sgdPep ${sacCer}.sgd.faa pepPredToFa ${dm} flyBasePep ${dm}.flyBase.faa pepPredToFa ${ce} sangerPep ${ce}.sanger.faa pepPredToFa ${rn} ${rnGene}Pep ${rn}.${rnGene}.faa for D in ${hg} ${mm} do pepPredToFa $D knownGenePep $D.known.faa done # known gene tables for D in ${hg} ${mm} do mkdir -p $D echo "targetGenesetPrefix known targetDb $D" > ${D}/${D}.config.ra echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \ | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra echo "${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra done for D in ${rn} do mkdir -p $D echo "targetGenesetPrefix ${rnGene} targetDb $D" > ${D}/${D}.config.ra echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \ | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra echo "${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra done for D in ${sacCer} do mkdir -p $D echo "targetGenesetPrefix sgd targetDb $D queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # flyBase table for D in ${dm} do mkdir -p $D echo "targetGenesetPrefix flyBase targetDb $D queryDbs ${hg} ${mm} ${rn} ${ce} ${sacCer} ${danRer} ${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # sanger table for D in ${ce} do mkdir -p $D echo "targetGenesetPrefix sanger targetDb $D queryDbs ${hg} ${mm} ${rn} ${dm} ${sacCer} ${danRer} ${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # Zebrafish danRer7 Ensembl genes for D in ${danRer} do mkdir -p $D echo "targetGenesetPrefix ensembl targetDb $D queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${sacCer} ${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done '_EOF_' # << happy emacs chmod +x prepFasta.sh ./prepFasta.sh cat << '_EOF_' > runAll.sh #!/bin/sh export sacCer="sacCer3" export ce="ce6" export danRer="danRer7" export dm="dm3" export rn="rn4" export hg="hg19" export mm="mm10" export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" for G in ${sacCer} ${ce} ${danRer} ${dm} ${rn} ${hg} ${mm} do cd "${TOP}/${G}" time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \ -clusterHub=swarm ${G}.config.ra > do.log 2>&1 done '_EOF_' # << happy emacs chmod +x runAll.sh ./runAll.sh # need to also do mm9 and sacCer2, save mm10, sacCer3 result: mv danRer7 danRer7.mm10.sacCer3 cat << '_EOF_' > prepMm9.sh #!/bin/sh export sacCer="sacCer3" export ce="ce6" export danRer="danRer7" export dm="dm3" export rn="rn4" export hg="hg19" export mm="mm9" export rnGene="rgdGene2" export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" cd ${runDir} for D in ${mm} do pepPredToFa $D knownGenePep $D.known.faa done # known gene tables for D in ${mm} do mkdir -p $D echo "targetGenesetPrefix known targetDb $D" > ${D}/${D}.config.ra echo "queryDbs ${hg} ${mm} ${rn} ${ce} ${dm} ${danRer} ${sacCer}" \ | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra echo "${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra done # Zebrafish danRer7 Ensembl genes for D in ${danRer} do mkdir -p $D echo "targetGenesetPrefix ensembl targetDb $D queryDbs ${mm} ${hg}Fa ${runDir}/${hg}.known.faa ${mm}Fa ${runDir}/${mm}.known.faa ${rn}Fa ${runDir}/${rn}.${rnGene}.faa ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${ce}Fa ${runDir}/${ce}.sanger.faa ${dm}Fa ${runDir}/${dm}.flyBase.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done '_EOF_' # << happy emacs chmod +x prepMm9.sh ./prepMm9.sh export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" for G in danRer7 mm9 do cd "${TOP}/${G}" time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \ -clusterHub=swarm ${G}.config.ra > do.log 2>&1 done # and the sacCer2, save the mm9 result mv danRer7 danRer7.mm9 cat << '_EOF_' > prepMm9.sh #!/bin/sh export sacCer="sacCer2" export ce="ce6" export danRer="danRer7" export dm="dm3" export rn="rn4" export hg="hg19" export mm="mm9" export rnGene="rgdGene2" export runDir="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" cd ${runDir} # get all the protein sets to be used here # pepPredToFa ${sacCer} sgdPep ${sacCer}.sgd.faa for D in ${sacCer} do mkdir -p $D echo "targetGenesetPrefix sgd targetDb $D queryDbs ${danRer} ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done # Zebrafish danRer7 Ensembl genes for D in ${danRer} do mkdir -p $D echo "targetGenesetPrefix ensembl targetDb $D queryDbs ${sacCer} ${sacCer}Fa ${runDir}/${sacCer}.sgd.faa ${danRer}Fa ${runDir}/${danRer}.ensembl.faa buildDir ${runDir}/${D} scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra done '_EOF_' # << happy emacs chmod +x prepSacCer2.sh ./prepSacCer2.sh export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab" for G in danRer7 sacCer2 do cd "${TOP}/${G}" time nice -n +19 doHgNearBlastp.pl -noLoad -workhorse=hgwdev \ -clusterHub=swarm ${G}.config.ra > do.log 2>&1 done ##### Now run recip best business, this requires moving #### the different danRer7 directory results around to get them #### in place for mm9 and sacCer2 vs the mm10 sacCer3 results mkdir /hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip cd /hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip cat << '_EOF_' > recipBest.sh #!/bin/sh export TOP="/hive/data/genomes/danRer7/bed/ensGene.68/blastTab/recip" cd $TOP DB=danRer7 # for tDB in hg19 mm10 rn4 ce6 dm3 sacCer3 # for tDB in sacCer2 # for tDB in mm9 for tDB in mm10 do echo $DB aToB=run.${DB}.${tDB} bToA=run.${tDB}.${DB} mkdir -p $aToB $bToA cat ../${tDB}/$aToB/out/*.tab > $aToB/all.tab cat ../${tDB}/$bToA/out/*.tab > $bToA/all.tab blastRecipBest $aToB/all.tab $bToA/all.tab \ $aToB/recipBest.tab $bToA/recipBest.tab dbBlastTab=xxBlastTab case $DB in ce6) dbBlastTab=ceBlastTab;; dm3) dbBlastTab=dmBlastTab;; danRer7) dbBlastTab=drBlastTab;; sacCer2) dbBlastTab=scBlastTab;; esac echo "hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab" # hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab done '_EOF_' # << happy emacs # survey tables before loading: for D in hg19 mm9 mm10 rn4 dm3 ce6 sacCer3 sacCer2 do echo hgsql -e \"select count'(*)' from drBlastTab\;\" $D hgsql -N -e "select count(*) from drBlastTab;" $D | cat wc -l run.${D}.danRer7/recipBest.tab done > survey.result.before # load all the drBlastTab tables: for D in ce6 dm3 hg19 mm10 mm9 rn4 sacCer2 sacCer3 do ls -og run.$D.danRer7/recipBest.tab echo "hgLoadBlastTab $D drBlastTab -maxPer=1 run.$D.danRer7/recipBest.tab" hgLoadBlastTab $D drBlastTab -maxPer=1 run.$D.danRer7/recipBest.tab done # survey tables after loading: for D in hg19 mm9 mm10 rn4 dm3 ce6 sacCer3 sacCer2 do echo hgsql -e \"select count'(*)' from drBlastTab\;\" $D hgsql -N -e "select count(*) from drBlastTab;" $D | cat wc -l run.${D}.danRer7/recipBest.tab done > survey.result.after # verify table row counts are not too much different: diff survey.result.before survey.result.after 2c2 < 13021 --- > 13076 5c5 < 12886 --- > 12904 8c8 < 12881 --- > 12918 11c11 < 7827 --- > 8928 14c14 < 5924 --- > 5929 17c17 < 4956 --- > 4958 20c20 < 3893 --- > 2386 23c23 < 2229 --- > 2231 ###########################################################################