/* used for cleaning up self alignments - deletes all overlapping self alignments */ #include "common.h" #include "linefile.h" #include "axt.h" #include "obscure.h" #include "hash.h" #include "dnautil.h" void usage() /* Explain usage and exit. */ { errAbort( "axtDropOverlap - deletes all overlapping self alignments. \n" "usage:\n" " axtDropOverlap in.axt tSizes qSizes out.axt\n" "Where tSizes and qSizes are tab-delimited files with \n" " \n" ); } struct hash *readSizes(char *fileName) /* Read tab-separated file into hash with * name key size value. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); char *row[2]; while (lineFileRow(lf, row)) { char *name = row[0]; int size = lineFileNeedNum(lf, row, 1); if (hashLookup(hash, name) != NULL) warn("Duplicate %s, ignoring all but first\n", name); else hashAdd(hash, name, intToPt(size)); } lineFileClose(&lf); return hash; } int findSize(struct hash *hash, char *name) /* Find size of name in hash or die trying. */ { void *val = hashMustFindVal(hash, name); return ptToInt(val); } void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* used for cleaning up self alignments - deletes all overlapping self alignments */ { struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; int totMatch = 0; int totSkip = 0; int totLines = 0; while ((axt = axtRead(lf)) != NULL) { totLines++; totMatch += axt->score; if (sameString(axt->qName, axt->tName)) { int qs = axt->qStart; int qe = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName)); if (axt->tStart == qs && axt->tEnd == qe) { /* printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", axt->qStrand, axt->qName, axt->symCount, axt->qStart, axt->qEnd, axt->tName, axt->symCount, axt->tStart, axt->tEnd ); */ totSkip++; continue; } } axtWrite(axt, f); axtFree(&axt); } fclose(f); lineFileClose(&lf); } int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 5) usage(); axtDropOverlap(argv[1], argv[2], argv[3], argv[4]); return 0; }