#include "readalign.h" double posterior( const string state, int i, int j, AlignDPTable* pFW, AlignDPTable* pBW ) { logspace total = pBW->getProb("start",0,0); logspace fw = pFW->getProb( state, i, j ); logspace bw = pBW->getProb( state, i, j ); return (fw * bw)/total; } double posterior( const string state, int i, int j, AlignBandingDPTable* pFW, AlignBandingDPTable* pBW ) { logspace total = pBW->getProb("start",0,0); logspace fw = pFW->getProb( state, i, j ); logspace bw = pBW->getProb( state, i, j ); return (fw * bw)/total; } void inline get_alignment( Path* path, char* iSeq1, char* iSeq2, int pad1id, int* seq2locus, int* y, int iLen1, char* line1, char* line2, char* line3, AlignDPTable* pFW, AlignDPTable* pBW) { int x = 0, aln=0; int i, j; *y = 0; // Loop over all transitions, except the last one (to the end state), // and the first one (into the delete state) j = (int)path->size()-1; for (i=1; i < j; i++) { if (path->toState(i) != pad1id) break; (*y)++; } *seq2locus = i-1; while (x < iLen1) { if (line3 != NULL) { int stateId = path->toState(i-1); string state = pFW->stateId[stateId]; int p = (int)(0.5 + 25.0*posterior(state, x, *y, pFW, pBW)); line3[aln] = 'a' + (p<0 ? 0 : (p>25 ? 25 : p)); } const vector& v = path->emission(i++); if (v[0] == 1) { line1[aln] = iSeq1[x++]; } else { line1[aln] = '-'; } if (v[1] == 1) { line2[aln++] = iSeq2[(*y)++]; } else { line2[aln++] = '-'; } } line1[aln] = 0; line2[aln] = 0; if (line3 != NULL) line3[aln] = 0; } void inline get_alignment( Path* path, char* iSeq1, char* iSeq2, int pad1id, int* seq2locus, int* y, int iLen1, char* line1, char* line2, char* line3, AlignBandingDPTable* pFW, AlignBandingDPTable* pBW) { int x = 0, aln=0; int i, j; *y = 0; // Loop over all transitions, except the last one (to the end state), // and the first one (into the delete state) j = (int)path->size()-1; for (i=1; i < j; i++) { if (path->toState(i) != pad1id) break; (*y)++; } *seq2locus = i-1; while (x < iLen1) { if (line3 != NULL) { int stateId = path->toState(i-1); string state = pFW->stateId[stateId]; int p = (int)(0.5 + 25.0*posterior(state, x, *y, pFW, pBW)); line3[aln] = 'a' + (p<0 ? 0 : (p>25 ? 25 : p)); } const vector& v = path->emission(i++); if (v[0] == 1) { line1[aln] = iSeq1[x++]; } else { line1[aln] = '-'; } if (v[1] == 1) { line2[aln++] = iSeq2[(*y)++]; } else { line2[aln++] = '-'; } } line1[aln] = 0; line2[aln] = 0; if (line3 != NULL) line3[aln] = 0; } unsigned int sensitiveAlign( char* iSeq1, char* iSeq2, int iLen1, int iLen2, char* pQuals1, int iInsNucPrior, int iGapOpen, int iGapExtend, char* line1, char* line2, char* line1q, int maxlikelihood, char* line3, char* line4, char* line3q, int iStartMean, int iStartSD ) { // Returns an alignment phred score, computed in the obvious way, except that a penalty of 6 is levied for every unaligned // nucleotide within the read footprint, representing the equilibrium distribution. Effectively this increases the gap extension penalty by 6. AlignDPTable* pViterbiTable; AlignDPTable* pFWTable; AlignDPTable* pBWTable; const double db = 10.0/log(0.1); int i; int score = int (0.5 + log(Viterbi_recurse(&pViterbiTable, pQuals1, iSeq1, iSeq2, iGapExtend, iGapOpen,iInsNucPrior, iLen1, iLen2, iStartMean, iStartSD)) * db); if (score > maxlikelihood) { // bail out, with correct score but bogus alignment delete pViterbiTable; for (i=0;igetId("pad1"); int seq2locus, y1; get_alignment( path, iSeq1, iSeq2, pad1id, &seq2locus, &y1, iLen1, line1, line2, line1q, pFWTable, pBWTable ); delete path; if (line3 != NULL) { Path* altpath = &Viterbi_trace(pViterbiTable, pQuals1, iSeq1, iSeq2, iGapExtend, iGapOpen, iInsNucPrior, iLen1, iLen2, iStartMean, iStartSD, 0.999); int y0; get_alignment( altpath, iSeq1, iSeq2, pad1id, &y0, &y1, iLen1, line3, line4, line3q, pFWTable, pBWTable ); delete altpath; } delete pViterbiTable; if (line1q) { delete pFWTable; delete pBWTable; } // return both score and seq2locus; don't clutter parameter list with another pointer // negative scores should be impossible, but be safe return max(0,score) + (seq2locus << 16); } extern "C" unsigned int sensitiveAlign( char* iSeq1, char* iSeq2, int iLen1, int iLen2, char* pQuals1, int iInsNucPrior, int iGapOpen, int iGapExtend, char* line1, char* line2, char* line1q, int maxlikelihood, char* line3, char* line4, char* line3q, int iStartMean, int iStartSD, int band ) { if (band < 0) return sensitiveAlign(iSeq1, iSeq2, iLen1, iLen2, pQuals1, iInsNucPrior, iGapOpen, iGapExtend, line1, line2, line1q, maxlikelihood, line3, line4, line3q, iStartMean, iStartSD); // Returns an alignment phred score, computed in the obvious way, except that a penalty of 6 is levied for every unaligned // nucleotide within the read footprint, representing the equilibrium distribution. Effectively this increases the gap extension penalty by 6. AlignBandingDPTable* pViterbiTable; AlignBandingDPTable* pFWTable; AlignBandingDPTable* pBWTable; const double db = 10.0/log(0.1); int i; int score = int (0.5 + log(ViterbiBanding_recurse(&pViterbiTable, pQuals1, iSeq1, iSeq2, iGapExtend, iGapOpen,iInsNucPrior, iLen1, iLen2, iStartMean, iStartSD, band)) * db); if (score > maxlikelihood) { // bail out, with correct score but bogus alignment delete pViterbiTable; for (i=0;igetId("pad1"); int seq2locus, y1; get_alignment( path, iSeq1, iSeq2, pad1id, &seq2locus, &y1, iLen1, line1, line2, line1q, pFWTable, pBWTable ); delete path; if (line3 != NULL) { Path* altpath = &ViterbiBanding_trace(pViterbiTable, pQuals1, iSeq1, iSeq2, iGapExtend, iGapOpen, iInsNucPrior, iLen1, iLen2, iStartMean, iStartSD, 0.999); int y0; get_alignment( altpath, iSeq1, iSeq2, pad1id, &y0, &y1, iLen1, line3, line4, line3q, pFWTable, pBWTable ); delete altpath; } delete pViterbiTable; if (line1q) { delete pFWTable; delete pBWTable; } // return both score and seq2locus; don't clutter parameter list with another pointer // negative scores should be impossible, but be safe return max(0,score) + (seq2locus << 16); }