/* filtRep - filter repeats out of all.st file. */ #include "common.h" #include "dnautil.h" #include "dnaseq.h" #include "fa.h" #include "xa.h" #include "fuzzyFind.h" void xaWriteOne(struct xaAli *xa, FILE *f) /* Write one xa to file. */ { fprintf(f, "%s align %3.1f%% of %d %s:%d-%d %c %s:%d-%d %c\n", xa->name, xa->milliScore*0.1, xa->symCount, xa->query, xa->qStart, xa->qEnd, xa->qStrand, xa->target, xa->tStart, xa->tEnd, xa->tStrand); fprintf(f, "%s\n%s\n%s\n", xa->qSym, xa->tSym, xa->hSym); } int matchScores[4]; int mismatchCost = -4; int shortGapCost = -12; void initMatchScores() /* Initialize match scores so G/C is better than A/T */ { matchScores[A_BASE_VAL] = matchScores[T_BASE_VAL] = 4; matchScores[G_BASE_VAL] = matchScores[C_BASE_VAL] = 6; } int wormXaScore(struct xaAli *xa) /* Quickly and crudely score an alignment. */ { int score = 0; int i; int symCount = xa->symCount; boolean lastGap = FALSE; boolean longGap = FALSE; char *ts = xa->tSym; char *qs = xa->qSym; char t,q; for (i=0; isymCount >= wayPlentyBig) return TRUE; ++filtPassWayBig; //if ((xaScore = wormXaScore(xa)) < minXaScore) // return FALSE; ++filtPassXaScore; ungapTarget(xa->tSym, xa->symCount, &target, &targetSize); if (targetSize >= plentyBig) return TRUE; ++filtPassPlentyBig; maxRepScore = round(targetSize*tooReppyRatio); for (rep = repList; rep != NULL; rep = rep->next) { if (ffFindAndScore(rep->dna, rep->size, target, targetSize, ffTight, &ffAli, &isRc, &score)) { ffFreeAli(&ffAli); if (score > maxRepScore) return FALSE; } } ++filtPassRep; return TRUE; } int main(int argc, char *argv[]) { char *inName = "c:/biodata/cbriggsae/test/unfiltered.st"; char *outName = "c:/biodata/celegans/xeno/cbriggsae/all.st"; char *repName = "c:/biodata/celegans/features/repeats.fa"; FILE *in, *out; struct xaAli *xa; int xaCount = 0; int newXaCount = 0; struct dnaSeq *repList; dnaUtilOpen(); initMatchScores(); in = xaOpenVerify(inName); out = mustOpen(outName, "w"); repList = faReadAllDna(repName); while ((xa = xaReadNext(in, FALSE)) != NULL) { if ( ++xaCount % 1000 == 0) printf("xa %d\n", xaCount); if (filterXa(xa, repList)) { ++newXaCount; xaWriteOne(xa, out); } xaAliFree(xa); } printf("filtSub = %d\n", filtSub); printf("filtPassWayBig = %d\n", filtPassWayBig); printf("filtPassXaScore = %d\n", filtPassXaScore); printf("filtPassPlentyBig = %d\n", filtPassPlentyBig); printf("filtPassRep = %d\n", filtPassRep); printf("oldXaCount %d newXaCount %d\n", xaCount, newXaCount); return 0; }