/* extend - extend tag matches to full sequence matches, making use of * banded Smith-Waterman. */ /* Copyright 2008 Jim Kent all rights reserved. */ #include "common.h" #include "chain.h" #include "chainConnect.h" #include "axt.h" #include "bandExt.h" #include "splix.h" #include "splat.h" static struct splatAlign *tagToAlign(struct splatTag *tag, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, struct axtScoreScheme *scoreScheme) /* Convert splatTag to splatAlign on the basic level. Don't (yet) * fill in score field or do extension. */ { char strand = tag->strand; struct dnaSeq *qSeq = (strand == '-' ? qSeqR : qSeqF); int chromIx = splixOffsetToChromIx(splix, tag->t1); int chromOffset = splix->chromOffsets[chromIx]; DNA *q = qSeq->dna; DNA *t = splix->allDna + chromOffset; /* Allocate and fill out cBlock structure on first alignment block. */ struct cBlock *block1; AllocVar(block1); block1->qStart = tag->q1; block1->qEnd = block1->qStart + tag->size1; block1->tStart = tag->t1 - chromOffset; block1->tEnd = block1->tStart + tag->size1; /* Allocate and fill in chain structure. */ struct chain *chain; AllocVar(chain); chain->tName = cloneString(splix->chromNames[chromIx]); chain->tSize = splix->chromSizes[chromIx]; chain->tStart = block1->tStart; chain->tEnd = block1->tEnd; chain->qName = cloneString(qSeq->name); chain->qSize = qSeq->size; chain->qStrand = strand; chain->qStart = block1->qStart; chain->qEnd = block1->qEnd; chain->blockList = block1; /* Allocate and fill out splatAlign struct. */ struct splatAlign *align; AllocVar(align); align->chain = chain; align->qDna = q; align->tDna = t; /* If need be add second block to alignment. */ if (tag->size2 > 0) { struct cBlock *block2; AllocVar(block2); block2->qStart = tag->q2; chain->qEnd = block2->qEnd = block2->qStart + tag->size2; block2->tStart = tag->t2 - chromOffset; chain->tEnd = block2->tEnd = block2->tStart + tag->size2; block1->next = block2; } return align; } double chainAddAxtScore(struct chain *chain, DNA *qDna, DNA *tDna, struct axtScoreScheme *scoreScheme) /* Score chain according to scoring scheme. */ { double score = 0; struct cBlock *b, *bNext; for (b = chain->blockList; b != NULL; b = bNext) { int bSize = b->tEnd - b->tStart; b->score = axtScoreUngapped(scoreScheme, qDna + b->qStart, tDna + b->tStart, bSize); score += b->score; bNext = b->next; if (bNext != NULL) { int qGap = bNext->qStart - b->qEnd; int tGap = bNext->tStart - b->tEnd; int gap = qGap + tGap; score -= gap * scoreScheme->gapExtend - scoreScheme->gapOpen; } } chain->score = score; return score; } static struct cBlock *bestBlock(struct chain *chain) /* Return best scoring block in chain */ { struct cBlock *block, *best = NULL; int bestScore = 0; for (block = chain->blockList; block != NULL; block = block->next) { if (block->score > bestScore) { bestScore = block->score; best = block; } } return best; } static struct cBlock *extendInRegion( DNA *qDna, int qStart, int qEnd, DNA *tDna, int tStart, int tEnd, int dir, char *qSym, char *tSym, int symAlloc, struct axtScoreScheme *scoreScheme, int maxGap) /* Do banded extension in region and return as a list of blocks. */ { int symCount; int qSize = qEnd - qStart; int tSize = tEnd - tStart; int qs, ts, qExtStart, tExtStart; if (qSize > 0 && tSize > 0) { if (dir < 0) { qStart = qEnd - qSize; tStart = tEnd - tSize; } bandExt(FALSE, scoreScheme, maxGap, qDna + qStart, qSize, tDna + tStart, tSize, dir, symAlloc, &symCount, qSym, tSym, &qs, &ts); if (dir > 0) { qExtStart = qStart; tExtStart = tStart; } else { qExtStart = qEnd - qs - 1; tExtStart = tEnd - ts - 1; } return cBlocksFromAliSym(symCount, qSym, tSym, qExtStart, tExtStart); } else return NULL; } void extendAroundBestBlock(struct splatAlign *ali, struct axtScoreScheme *scoreScheme) /* Return realignment created by extending in both directions from the mid-point of the * best block in the existing alignment. */ { int maxSingleGap = 9; int maxTotalGaps = 3*maxSingleGap; struct chain *chain = ali->chain; struct cBlock *anchor = bestBlock(chain); int qMid = (anchor->qStart + anchor->qEnd)/2; int tMid = (anchor->tStart + anchor->tEnd)/2; int symAlloc = chain->qSize * 2; char *qSym, *tSym; AllocArray(qSym, symAlloc); AllocArray(tSym, symAlloc); int qBeforeSize = qMid; struct cBlock *blocksBefore = extendInRegion(ali->qDna, 0, qMid, ali->tDna, tMid - qBeforeSize - maxTotalGaps, tMid, -1, qSym, tSym, symAlloc, scoreScheme, maxSingleGap); int qAfterSize = chain->qSize - qMid; struct cBlock *blocksAfter = extendInRegion(ali->qDna, qMid, chain->qSize, ali->tDna, tMid, tMid + qAfterSize + maxTotalGaps, 1, qSym, tSym, symAlloc, scoreScheme, maxSingleGap); struct cBlock *allBlocks = slCat(blocksBefore, blocksAfter); slFreeList(&chain->blockList); chain->blockList = allBlocks; chainMergeAbutting(chain); chainCalcBounds(chain); chainAddAxtScore(chain, ali->qDna, ali->tDna, scoreScheme); freeMem(qSym); freeMem(tSym); } struct splatAlign *splatExtendTag(struct splatTag *tag, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, struct axtScoreScheme *scoreScheme) /* Convert a single tag to an alignment. */ { struct splatAlign *ali = tagToAlign(tag, qSeqF, qSeqR, splix, scoreScheme); struct chain *chain = ali->chain; chainAddAxtScore(chain, ali->qDna, ali->tDna, scoreScheme); int qxSize = chain->qSize - chain->qEnd; /* extra q size. */ int txSize = chain->tSize - chain->tEnd; /* extra t size. */ if (qxSize > 0 && txSize > 0) extendAroundBestBlock(ali, scoreScheme); return ali; } #ifdef OLD struct splatAlign *splatExtendTag(struct splatTag *tag, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, struct axtScoreScheme *scoreScheme) /* Convert a single tag to an alignment. */ { int maxSingleGap = 9; int maxTotalGap = maxSingleGap * 3; struct splatAlign *ali = tagToAlign(tag, qSeqF, qSeqR, splix, scoreScheme); struct chain *chain = ali->chain; chainAddAxtScore(chain, ali->qDna, ali->tDna, scoreScheme); int qxSize = chain->qSize - chain->qEnd; /* extra q size. */ int txSize = chain->tSize - chain->tEnd; /* extra t size. */ if (qxSize > 0 && txSize > 0) { int qxStart = chain->qEnd; int qxEnd = chain->qSize; int txStart = chain->tEnd; int txEnd = chain->tSize; int txMaxSize = qxSize + maxTotalGap; if (txSize > txMaxSize) txSize = txMaxSize; int symAlloc = qxSize + txSize; char *qSym, *tSym; AllocArray(qSym, symAlloc); AllocArray(tSym, symAlloc); int symCount, revStartQ, revStartT; if (bandExt(FALSE, scoreScheme, 9, ali->qDna + qxStart, qxSize, ali->tDna + txStart, txSize, 1, symAlloc, &symCount, qSym, tSym, &revStartQ, &revStartT)) { struct cBlock *newBlocks = cBlocksFromAliSym(symCount, qSym, tSym, qxStart, txStart); struct cBlock *lastOldBlock = slLastEl(chain->blockList); lastOldBlock->next = newBlocks; chainMergeAbutting(chain); chainCalcBounds(chain); chainAddAxtScore(chain, ali->qDna, ali->tDna, scoreScheme); } freeMem(qSym); freeMem(tSym); } return ali; } #endif /* OLD */ struct splatAlign *splatExtendTags(struct splatTag *tagList, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, struct axtScoreScheme *scoreScheme) /* Convert a list of tags to a list of alignments. */ { struct splatTag *tag; struct splatAlign *aliList = NULL; for (tag = tagList; tag != NULL; tag = tag->next) { struct splatAlign *ali = splatExtendTag(tag, qSeqF, qSeqR, splix, scoreScheme); slAddHead(&aliList, ali); } slReverse(&aliList); return aliList; }