/* output - output alignments to files in various formats. */ /* Copyright 2008 Jim Kent all rights reserved. */ #include "common.h" #include "dystring.h" #include "dnautil.h" #include "dnaseq.h" #include "maf.h" #include "chain.h" #include "splix.h" #include "splat.h" static void splatOutputMaf(struct splatAlign *ali, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, FILE *f) /* Output alignment to maf file. */ { /* TODO: This is sufficiently refactored now that it can become a thin * wrapper around chainToMaf, mafWrite, mafFree. */ struct cBlock *bStart = ali->chain->blockList; struct chain *chain = ali->chain; /* Create symbols - DNA alignment with dashes for inserts - a long process. */ struct dyString *qSym = dyStringNew(0), *tSym = dyStringNew(0); /* Loop through blocks... */ struct cBlock *b, *bNext; for (b = bStart; b != NULL; b = bNext) { /* Output letters in block. */ int bSize = b->tEnd - b->tStart; DNA *qDna = ali->qDna + b->qStart; DNA *tDna = ali->tDna + b->tStart; int i; for (i=0; inext; if (bNext != NULL) { int qGapSize = bNext->qStart - b->qEnd; if (qGapSize > 0) { for (i=0; itStart - b->tEnd; if (tGapSize > 0) { for (i=0; istringSize; /* Build up a maf component for query */ struct mafComp *qComp; AllocVar(qComp); qComp->src = cloneString(chain->qName); qComp->srcSize = chain->qSize; qComp->strand = chain->qStrand; qComp->start = bStart->qStart; qComp->size = chain->qEnd - chain->qStart; qComp->text = dyStringCannibalize(&qSym); /* Build up a maf component for target */ struct mafComp *tComp; AllocVar(tComp); tComp->src = cloneString(chain->tName); tComp->srcSize = chain->tSize; tComp->strand = '+'; tComp->start = chain->tStart; tComp->size = chain->tEnd - chain->tStart; tComp->text = dyStringCannibalize(&tSym); /* Build up a maf alignment structure . */ struct mafAli *maf; AllocVar(maf); maf->score = chain->score; maf->textSize = symSize; maf->components = tComp; tComp->next = qComp; /* Save it to file, and free it up. */ mafWrite(f, maf); mafAliFree(&maf); } static void splatOutputMafs(struct splatAlign *aliList, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, FILE *f) /* Output alignment list to maf file. */ { struct splatAlign *ali; for (ali = aliList; ali != NULL; ali = ali->next) splatOutputMaf(ali, qSeqF, qSeqR, f); } static void splatOutputPsl(struct splatAlign *ali, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, FILE *f) /* Output alignment to psl file. */ { /* TODO - this is sufficiently refactored now that it can be a very thin * wrapper around chainToPsl/pslWrite/pslFree. */ struct chain *chain = ali->chain; struct cBlock *bStart = ali->chain->blockList; /* Point to DNA */ DNA *qDna = ali->qDna; DNA *tDna = ali->tDna; /* Loop through blocks and calculate summary fields. */ unsigned match = 0; /* Number of bases that match that aren't repeats */ unsigned misMatch = 0; /* Number of bases that don't match */ unsigned repMatch = 0; /* Number of bases that match but are part of repeats */ unsigned nCount = 0; /* Number of 'N' bases */ unsigned qNumInsert = 0; /* Number of inserts in query */ int qBaseInsert = 0; /* Number of bases inserted in query */ unsigned tNumInsert = 0; /* Number of inserts in target */ int tBaseInsert = 0; /* Number of bases inserted in target */ struct cBlock *b, *bNext; for (b = bStart; b != NULL; b = bNext) { /* Figure out block size, mismatch count, and match count. */ int bSize = b->tEnd - b->tStart; int miss = countDnaDiffs(qDna + b->qStart, tDna + b->tStart, bSize); match += bSize - miss; misMatch += miss; /* If not last block, figure out gap info. */ bNext = b->next; if (bNext != NULL) { int qGapSize = bNext->qStart - b->qEnd; if (qGapSize > 0) { qNumInsert += 1; qBaseInsert += qGapSize; } int tGapSize = bNext->tStart - b->tEnd; if (tGapSize > 0) { tNumInsert += 1; tBaseInsert += tGapSize; } } } /* Write out summary fields */ fprintf(f, "%d\t", match); fprintf(f, "%d\t", misMatch); fprintf(f, "%d\t", repMatch); fprintf(f, "%d\t", nCount); fprintf(f, "%d\t", qNumInsert); fprintf(f, "%d\t", qBaseInsert); fprintf(f, "%d\t", tNumInsert); fprintf(f, "%d\t", tBaseInsert); /* Write out basic info on query */ fprintf(f, "%c\t", chain->qStrand); fprintf(f, "%s\t", chain->qName); fprintf(f, "%d\t", chain->qSize); /* Handle qStart/qEnd field, which requires some care on - strand. */ int qStart = chain->qStart; int qEnd = chain->qEnd; if (chain->qStrand == '-') reverseIntRange(&qStart, &qEnd, chain->qSize); fprintf(f, "%d\t%d\t", qStart, qEnd); /* Target is always on plus strand, so easier. */ fprintf(f, "%s\t", chain->tName); // tName fprintf(f, "%d\t", chain->tSize); // tSize fprintf(f, "%d\t", chain->tStart); // tStart fprintf(f, "%d\t", chain->tEnd); // tEnd /* Now deal with block fields. */ fprintf(f, "%d\t", slCount(bStart)); /* blockSizes */ for (b = bStart; b != NULL; b = b->next) fprintf(f, "%d,", b->tEnd - b->tStart); fputc('\t', f); /* qStarts */ for (b = bStart; b != NULL; b = b->next) fprintf(f, "%d,", b->qStart); fputc('\t', f); /* tStarts */ for (b = bStart; b != NULL; b = b->next) fprintf(f, "%d,", b->tStart); fputc('\n', f); } static void splatOutputPsls(struct splatAlign *aliList, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, FILE *f) /* Output alignment list to psl file. */ { struct splatAlign *ali; for (ali = aliList; ali != NULL; ali = ali->next) splatOutputPsl(ali, qSeqF, qSeqR, f); } static void dnaOutUpperMatch(DNA a, DNA b, FILE *f) /* Write out a in upper case if it matches b, otherwise in lower case. */ { if (a == b) a = toupper(a); else a = tolower(a); fputc(a, f); } static void splatOutputSplat(struct splatAlign *ali, int mapCount, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, FILE *f) /* Output alignment to splat format output file. (Tag-align plus query name) */ { struct chain *chain = ali->chain; struct cBlock *bStart = ali->chain->blockList; /* Write chrom chromStart chromEnd */ fprintf(f, "%s\t", chain->tName); fprintf(f, "%d\t%d\t", chain->tStart, chain->tEnd); /* Write sequence including gaps (- for deletions, ^ for inserts) */ DNA *qDna = ali->qDna; DNA *tDna = ali->tDna; struct cBlock *b, *bNext; for (b = bStart; b != NULL; b = bNext) { /* Output DNA */ int bSize = b->tEnd - b->tStart; int i; for (i=0; iqStart + i], tDna[b->tStart + i], f); /* Output gaps if any. */ bNext = b->next; if (bNext != NULL) { int qGapSize = bNext->qStart - b->qEnd; int tGapSize = bNext->tStart - b->tEnd; for (i=0; iqEnd+i]), f); } for (i=0; iqStrand); fprintf(f, "%s\n", chain->qName); } static void splatOutputSplats(struct splatAlign *aliList, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, FILE *f) /* Output list of alignments to splat format output file. (Tag-align plus query name)*/ { int mapCount = slCount(aliList); struct splatAlign *ali; for (ali = aliList; ali != NULL; ali = ali->next) splatOutputSplat(ali, mapCount, qSeqF, qSeqR, f); } void splatOutList(struct splatAlign *aliList, char *outType, struct dnaSeq *qSeqF, struct dnaSeq *qSeqR, struct splix *splix, FILE *f) /* Output list of alignments to file in format defined by outType. */ { if (sameString(outType, "maf")) splatOutputMafs(aliList, qSeqF, qSeqR, splix, f); else if (sameString(outType, "psl")) splatOutputPsls(aliList, qSeqF, qSeqR, splix, f); else if (sameString(outType, "splat")) splatOutputSplats(aliList, qSeqF, qSeqR, splix, f); } void splatOutHeader(char *target, char *query, char *outType, FILE *f) /* Output file header if any. Also check and abort if outType is not supported. */ { if (sameString(outType, "maf")) { fprintf(f, "##maf version=1 program=splat\n"); fprintf(f, "# splat.v%s %s %s\n", version, target, query); fprintf(f, "\n"); } else if (sameString(outType, "psl")) { } else if (sameString(outType, "splat")) ; else errAbort("Unrecognized output type %s", outType); }