/* Quick lookup of cDNA based on binary search of index file, etc. */ #include "common.h" #include "snof.h" void *mustAlloc(int size) { void *ret; ret = malloc(size); if (ret == NULL) { fprintf(stderr, "Out of memory %d bytes\n", size); exit(-2); } return ret; } void printTo(FILE *f, char untilC) { int c; for (;;) { c = getc(f); if (c == EOF) break; if (c == untilC) { putchar(c); break; } } for (;;) { c = getc(f); if (c == untilC || c == EOF) break; putchar(c); } } struct nameList { struct nameList *next; char *name; }; struct nameList *newNameList(char *name) { int len = strlen(name); struct nameList *nl; int allocSize = len+1+sizeof(*nl); nl = mustAlloc(allocSize); nl->next = NULL; nl->name = (char *)(nl+1); strcpy(nl->name, name); return nl; } struct nameList *findCdnaNames(char *geneName, struct snof *snof, FILE *f) { char lineBuf[1024*2]; char *words[256]; int wordCount; int i; struct nameList *list = NULL; long offset; if (!snofFindOffset(snof, geneName, &offset)) return NULL; fseek(f, offset, SEEK_SET); mustGetLine(f, lineBuf, sizeof(lineBuf)); wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words)); assert(strcmp(words[0], geneName) == 0); for (i=1; inext = list; list = el; } slReverse(&list); return list; } int main(int argc, char *argv[]) { FILE *faFile, *g2cFile, *inputFile; long faOffset; char *faName = "\\biodata\\celegans\\cDNA\\allcdna.fa"; char *faIndexName = "\\biodata\\celegans\\cDNA\\allcdna"; char *g2cName = "\\biodata\\celegans\\cDNA\\gene2cdna.txt"; char *g2cIndexName = "\\biodata\\celegans\\cDNA\\gene2cdna"; char *inputName; struct snof *faSnof; struct snof *g2cSnof; if (argc != 2) { fprintf(stderr, "Usage: %s inputFile\n", argv[0]); exit(-1); } /* Grab input file name and open file. */ inputName = argv[1]; inputFile = mustOpen(inputName, "r"); /* Open fa file (with actual sequence) */ faFile = mustOpen(faName, "rb"); /* Open file that has list of cDNA's and genes */ g2cFile = mustOpen(g2cName, "r"); /* Open and initialize index file. */ if ((faSnof = snofOpen(faIndexName)) == NULL) { fprintf(stderr, "%s isn't a good index file, sorry\n", faIndexName); exit(-1); } if ((g2cSnof = snofOpen(g2cIndexName)) == NULL) { fprintf(stderr, "%s isn't a good index file, sorry\n", g2cIndexName); exit(-1); } /* Loop through each line of input file. Grab gene name from it. * Then scan through g2c file to find names of cDNAs that go with it. */ for (;;) { char lineBuf[256]; char *words[16]; int wordCount; if (fgets(lineBuf, sizeof(lineBuf), inputFile) == NULL) break; printf("%s", lineBuf); wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words)); if (wordCount > 0) { char *geneName = words[0]; struct nameList *cdnaNames = findCdnaNames(geneName, g2cSnof, g2cFile); struct nameList *el; printf(">>Gene %s\n", geneName); if (cdnaNames == NULL) printf("No cDNA\n"); for (el = cdnaNames; el != NULL; el = el->next) { if (!snofFindOffset(faSnof, el->name, &faOffset)) { fprintf(stderr, "Can't find cDNA %s\n", el->name); } else { fseek(faFile, faOffset, SEEK_SET); printTo(faFile, '>'); } } printf("\n"); } } snofClose(&faSnof); snofClose(&g2cSnof); return 0; }