/* leaders.c - collect information on SL1/SL2 leaders embedded in worm cDNA. */ #include "common.h" #include "cda.h" #include "snof.h" #include "wormdna.h" static struct snof *aliSnof; static FILE *aliFile; void wormOpenAliSnof() /* Set up for fast access alignments of named cDNAs. */ { if (aliSnof == NULL) { char fileName[512]; sprintf(fileName, "%sgood", wormCdnaDir()); aliSnof = snofOpen(fileName); } if (aliFile == NULL) aliFile = wormOpenGoodAli(); } struct cdaAli *wormCdaAli(char *name) /* Return named cDNA alignment or NULL if it doesn't exist. */ { long offset; wormOpenAliSnof(); if (!snofFindOffset(aliSnof, name, &offset)) return NULL; fseek(aliFile, offset, SEEK_SET); return cdaLoadOne(aliFile); } struct leader { char *name; char *seq; int size; }; struct leader leaders[] = { {"SL1", "ggtttaattacccaagtttgag", 22}, {"SL2", "ggttttaacccagttactcaag", 22}, {"SL2.A", "ggttttaacccagttaaccaag", 22}, {"SL2.B", "ggttttaacccagtttaacca", 21}, }; boolean findLeader(struct dnaSeq *seq, struct leader **retLeader, int *retPos, int *retSize) /* Find first leader subsequence. */ { DNA *dna = seq->dna; int i; int ix; int bestIx = 0x7fffffff; int bestSize = 0; struct leader *bestLeader = NULL; char *match; int off; for (off = 0; off < 10; ++off) { for (i=0; idna, seq->size); if (findLeader(seq, &leader, &leaderPos, &leaderSize)) { uglyf("Got %d of %d bases of %s leader at %d of %s\n", leaderSize, leader->size, leader->name, leaderPos, cdnaName); AllocVar(hit); hit->cdnaName = cloneString(cdnaName); hit->cdnaSize = seq->size; hit->hitPos = leaderPos; hit->hitSize = leaderSize; hit->leader = leader; slAddHead(&hitList, hit); } #ifdef SOON #endif /* SOON */ cdaFreeAli(ali); } wormFreeCdnaInfo(&info); freeDnaSeq(&seq); } freeWormCdnaIterator(&it); slReverse(&hitList); for (hit = hitList; hit != NULL; hit = hit->next) { char *name = hit->cdnaName; int nameLen = strlen(name); if (name[nameLen-2] == '.' && isdigit(name[nameLen-1])) { printf("%s hits %s at base %d of %d for %d bases\n", hit->leader->name, hit->cdnaName, hit->hitPos, hit->cdnaSize, hit->hitSize); ++estHitCount; } } printf("%d hits in all, %d in ESTs\n", slCount(hitList), estHitCount); return 0; }