/* Noali - this looks through the big blast output file from my non-aligners. */ #include "common.h" struct bh { struct bh *next; int idP, idQ; }; struct info { struct info *next; char *cdnaName; int cdnaSize; char *cosmidName; struct bh *hitList; int score; double scaledScore; }; void gatherInfo(char *inName, struct info **retInfo, int *retHitCount, int *retMissCount) { FILE *in; char line[512]; char *words[128]; int wordCount; int lineCount = 0; struct info *infoList = NULL, *info = NULL; struct bh *bh = NULL; boolean nextHasCosmid = FALSE; int hitCount = 0, queryCount = 0; in = mustOpen(inName, "r"); while (fgets(line, sizeof(line), in)) { ++lineCount; if (lineCount%10000 == 0) printf("%s %d\n", inName, lineCount); wordCount = chopLine(line, words); if (wordCount < 1) continue; if (sameString(words[0], "####")) { char *parts[3]; chopString(words[1], "/\\", parts, ArraySize(parts)); AllocVar(info); slAddHead(&infoList, info); info->cdnaName = cloneString(parts[1]); ++queryCount; } else if (wordCount > 1 && sameString(words[1], "letters)")) { info->cdnaSize = atoi(words[0]+1); } else if (wordCount > 4 && sameString(words[0], "*****") && sameString(words[1], "No") && sameWord(words[2], "hits") ) { infoList = infoList->next; /* Erase info */ } else if (wordCount > 3 && sameString(words[2], "significant") ) { nextHasCosmid = TRUE; ++hitCount; } else if (nextHasCosmid) { int cosmidIx; char *cosmidName; char *commaPos; nextHasCosmid = FALSE; cosmidIx = stringArrayIx("cosmid", words, wordCount); if (cosmidIx < 1) errAbort("Couldn't find cosmid in line %d\n", lineCount); cosmidName = words[cosmidIx + 1]; if ((commaPos = strchr(cosmidName, ',')) != NULL) *commaPos = 0; info->cosmidName = cloneString(cosmidName); } else if (sameString("Identities", words[0]) && wordCount > 2) { char *parts[3]; AllocVar(bh); slAddTail(&info->hitList, bh); chopString(words[2], "/", parts, ArraySize(parts)); bh->idP = atoi(parts[0]); bh->idQ = atoi(parts[1]); } } slReverse(&infoList); *retInfo = infoList; *retHitCount = hitCount; *retMissCount = queryCount - hitCount; } int cmpInfo(const void *va, const void *vb) /* Compare two introns. */ { struct info **pA = (struct info **)va; struct info **pB = (struct info **)vb; struct info *a = *pA, *b = *pB; return (int)(10000*(b->scaledScore - a->scaledScore)); } int sumIdP(struct info *info) { struct bh *bh; int sum = 0; for (bh = info->hitList; bh != NULL; bh = bh->next) sum += bh->idP; return sum; } struct info *processInfo(struct info *info) { struct info *inf; for (inf = info; inf != NULL; inf = inf->next) { inf->score = sumIdP(inf); inf->scaledScore = (double)(inf->score) / inf->cdnaSize; } slSort(&info, cmpInfo); return info; } void saveInfo(char *fileName, struct info *info) { FILE *f = mustOpen(fileName, "w"); for (;info != NULL; info = info->next) { fprintf(f, "%s hits %s about %d bases out of %d (%d%%)\n", info->cdnaName, info->cosmidName, info->score, info->cdnaSize, (int)((info->scaledScore)*100 + 0.5)); } } int main(int argc, char *argv[]) { struct info *info; char *inName, *outName; int hitCount, missCount; if (argc != 3) { errAbort("Noali - analyses blast output of nonaligners\n" "usage:\n" " noali input output"); } inName = argv[1]; outName = argv[2]; gatherInfo(inName, &info, &hitCount, &missCount); printf("%d hits %d misses in %s\n", hitCount, missCount, inName); info = processInfo(info); saveInfo(outName, info); return 0; }