# MULTIZ with other methanogens # DONE (10/15/05), kpollard cd /cluster/data/methBurt1/bed mkdir conservation cd conservation cp /cluster/data/methBark1/bed/conservation/HoxD55.q . cp /cluster/data/methBark1/bed/conservation/*.nib . cp /cluster/data/methBark1/bed/conservation/*.chr . cp /cluster/data/methBark1/bed/conservation/methBark1.fa . cp /cluster/data/methBark1/bed/conservation/methBurt1.fa . faToTwoBit methBark1.fa methBark1.2bit faToTwoBit methBurt1.fa methBurt1.2bit sed s/chr/methBark1.chr/ methBark1.fa > temp sed s/plas/methBark1.plas/ temp > methBark1.fa rm temp #chrom sizes faSize -detailed *.chr *.fa > chrom.sizes #split methBurt1.fa faSplit byname methBurt1.fa ./ foreach f (methBurt1.Contig*.fa) echo `basename $f .fa` >> contigs.txt end #blastz (run on kolossus) ssh kolossus cd /cluster/data/methBurt1/bed/conservation/ foreach f (`cat contigs.txt`) echo $f blastz $f.fa methBark1.fa Q=HoxD55.q > ${f}-methBark1.lav blastz $f.fa methMaze1.chr Q=HoxD55.q > ${f}-methMaze1.lav blastz $f.fa metAce1.chr Q=HoxD55.q > ${f}-metAce1.lav end foreach f (`cat contigs.txt`) echo $f echo "lavToAxt" lavToAxt ${f}-methBark1.lav methBurt1.2bit methBark1.2bit ${f}-methBark1.axt lavToAxt ${f}-methMaze1.lav methBurt1.2bit . ${f}-methMaze1.axt lavToAxt ${f}-metAce1.lav methBurt1.2bit . ${f}-metAce1.axt echo "axtBest" axtBest ${f}-methBark1.axt all -winSize=500 -minScore=5000 ${f}-methBark1-best.axt axtBest ${f}-methMaze1.axt all -winSize=500 -minScore=5000 ${f}-methMaze1-best.axt axtBest ${f}-metAce1.axt all -winSize=500 -minScore=5000 ${f}-metAce1-best.axt echo "axtToMaf" axtToMaf ${f}-methBark1-best.axt chrom.sizes chrom.sizes ${f}-methBark1.maf axtToMaf ${f}-methMaze1-best.axt chrom.sizes chrom.sizes ${f}-methMaze1.maf axtToMaf ${f}-metAce1-best.axt chrom.sizes chrom.sizes ${f}-metAce1.maf end #multiz foreach f(*.maf) cat $f | gawk 'BEGIN{getline; print $0; getline; getline; getline; getline;}{print $0;}' > methBurt mv methBurt $f end foreach f(`cat contigs.txt`) echo $f multiz ${f}-methBark1.maf ${f}-methMaze1.maf - > ${f}-methBark1-methMaze1.maf multiz ${f}-metAce1.maf ${f}-methBark1-methMaze1.maf - > ${f}-methBark1-methMaze1-metAce1.maf end #get rid of MAF files with no data foreach f(`cat contigs.txt`) wc -l ${f}-methBark1-methMaze1-metAce1.maf | gawk '{if($1<4){print "rm "$2}}' >> rmjobs end chmod +x rmjobs rmjobs #phyloHMM foreach ff(*-methBark1-methMaze1-metAce1.maf) set f=`basename $ff -methBark1-methMaze1-metAce1.maf` echo $f msa_view -i MAF -M $f.fa -o SS $ff > $f.ss cat $f.ss | gawk '{if(/^NAMES/){print "NAMES = methBurt1,methBark1,methMaze1,metAce1";} else{print $0;}}' > methBurt mv methBurt $f.ss phyloFit -i SS $f.ss -t "(metAce1,(methBurt1,(methBark1,methMaze1)))" -o ${f}_MbaMaMmMbu end #Contig57 has the largest NTUPLES (1071) so use it for starting mod # it shows GC=0.212928+0.220304=0.433232 #add GC content to next call foreach f(*.ss) set b=$f:t:r echo $b phastCons $f methBurt1.Contig57_MbaMaMmMbu.mod \ --gc 0.4332 --target-coverage 0.7 --estimate-trees ${b} \ --expected-lengths 25 --no-post-probs --ignore-missing \ --nrates 1,1 end cat contigs.txt | gawk '/Contig29/{getline;}{print substr($1,11,length($1));}' > cont.txt foreach b(`cat cont.txt`) echo $b phastCons methBurt1.$b.ss \ methBurt1.Contig57.cons.mod,methBurt1.Contig57.noncons.mod \ --target-coverage 0.7 --expected-lengths 25 \ --viterbi ${b}_methBurt1-elements.bed --score \ --require-informative 0 --seqname $b > ${b}_cons.dat wigEncode ${b}_cons.dat ${b}_phastCons.wig ${b}_phastCons.wib end #combine phastCons elements into 1 bed file cat *elements.bed > phastCons.bed #move data mkdir wib mv Contig*_phastCons.wib wib/. mv Contig*_phastCons.wig wib/. mkdir /gbdb/methBurt1/wib ln -s /cluster/data/methBurt1/bed/conservation/wib/*.wib /gbdb/methBurt1/wib mkdir /gbdb/methBurt1/pwMaf mkdir -p otherSpp/methBark1 otherSpp/methMaze1 otherSpp/metAce1 foreach f(`cat cont.txt`) echo $f mv methBurt1.${f}-methBark1.maf otherSpp/methBark1/$f.maf mv methBurt1.${f}-methMaze1.maf otherSpp/methMaze1/$f.maf mv methBurt1.${f}-metAce1.maf otherSpp/metAce1/$f.maf end ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/methBark1 /gbdb/methBurt1/pwMaf/methBark1_pwMaf ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/metAce1 /gbdb/methBurt1/pwMaf/metAce1_pwMaf ln -s /cluster/data/methBurt1/bed/conservation/otherSpp/methMaze1 /gbdb/methBurt1/pwMaf/methMaze1_pwMaf mkdir multiz foreach f(`cat cont.txt`) echo $f mv methBurt1.${f}-methBark1-methMaze1-metAce1.maf multiz/$f.maf end ln -s /cluster/data/methBurt1/bed/conservation/multiz /gbdb/methBurt1/multizMbuMbaMmMa #load hgLoadWiggle methBurt1 phastCons /cluster/data/methBurt1/bed/conservation/wib/*_phastCons.wig hgLoadMaf -warn methBurt1 multizMbuMbaMmMa hgLoadMaf -warn methBurt1 methBark1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/methBark1_pwMaf hgLoadMaf -warn methBurt1 metAce1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/metAce1_pwMaf hgLoadMaf -warn methBurt1 methMaze1_pwMaf -pathPrefix=/gbdb/methBurt1/pwMaf/methMaze1_pwMaf hgLoadBed methBurt1 phastConsElements phastCons.bed #trackDb cd ~/kent/src/hg/makeDb/trackDb/archae mkdir methBurt1 cvs add methBurt1 cd methBurt1 #trackDb.ra entry # track multizMbuMbaMmMa # shortLabel Conservation # longLabel Methanogen multiz alignments # group compGeno # priority 10.0 # visibility pack # type wigMaf 0.0 1.0 # maxHeightPixels 100:40:11 # wiggle phastCons # yLineOnOff Off # autoScale Off # pairwise pwMaf # speciesOrder methBark1 methMaze1 metAce1 cvs add trackDb.ra cvs commit -m "New multiz track" trackDb.ra #html page cvs add multizMbuMbaMmMa.html cvs commit -m "Details page for multiz track" multizMbuMbaMmMa.html