/* Debog - The Sanger chromosome .FA files contain some unusual letters * for nucleotides. Earlier I just threw these out. Alas, I needed to * keep them as placeholders. Now my offsets into the chromosomes are * messed up. My choice was to rerun a week-long computing job to * generate the correct offsets, or to write this program, which patches * them up. */ #include "common.h" #include "dnautil.h" #include "wormdna.h" void makeBogLists(char *bogDir, char *chromNames[], int chromCount, int *bogLists[], int bogCounts[]) { FILE *bogFile; int chromIx; int *bog; int maxBogSize = 25000; char bogFileName[256]; for (chromIx=0; chromIx 0) { if (bogCount >= maxBogSize) errAbort("More than %d bogs\n", maxBogSize); bog[bogCount++] = atoi(words[0]); } } bogCounts[chromIx] = bogCount; fclose(bogFile); uglyf("Read %d bogs in %s\n", bogCount, bogFileName); } else { bogLists[chromIx] = NULL; bogCounts[chromIx] = 0; } } } boolean findStringIx(char *string, char *list[], int listSize, int *retIx) { int i; for (i=0; i x) break; ++x; } return x; } void debogFilter(int *bogLists[], int bogCounts[], char *chromNames[], int chromCount, FILE *in, FILE *out) { char lineBuf[512]; char wordBuf[512]; char *words[12]; int wordCount; while (fgets(lineBuf, sizeof(lineBuf), in) != NULL) { strcpy(wordBuf, lineBuf); wordCount = chopString(wordBuf, whiteSpaceChopper, words, ArraySize(words)); if (wordCount >= 5 && strcmp(words[2], "hits") == 0) { char *chromName; int chromIx; int start, end; char *startEndString; char *rangeWords[2]; int rangeWordCount; int i; chromName = words[4]; if (!findStringIx(chromName, chromNames, chromCount, &chromIx)) { if (!differentWord(chromName, "Genome")) /* Mitochondria */ chromIx = 6; else errAbort("Couldn't find chromosome named %s\n", chromName); } startEndString = words[5]; rangeWordCount = chopString(startEndString, "-", rangeWords, ArraySize(rangeWords)); start = atoi(rangeWords[0]); end = atoi(rangeWords[1]); start = debogOne(start, bogLists[chromIx], bogCounts[chromIx]); end = debogOne(end, bogLists[chromIx], bogCounts[chromIx]); for (i=0; i