/* segInfo - Get info about a segment file. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "options.h" #include "seg.h" static struct optionSpec options[] = { {"merge", OPTION_BOOLEAN}, {"noChrom", OPTION_BOOLEAN}, {"ref", OPTION_STRING}, {"species", OPTION_BOOLEAN}, {NULL, 0}, }; static boolean merge = FALSE; static boolean noChrom = FALSE; static char *ref = NULL; static boolean species = FALSE; static struct hash *exists = NULL; static struct slName *keyList = NULL; struct nameListList { /* A list of struct slName. */ struct nameListList *next; struct slName *nameList; }; void usage() /* Explain usage and exit. */ { errAbort( "segInfo - Get info from segment file(s)\n" "usage:\n" " segInfo in.seg ...\n" "options:\n" " -ref= Use as the reference species. For each ref.chrom,\n" " output a unique list of species seen with that ref.chrom.\n" " (default = first species seen in the segment files(s))\n" " -species Output unique list of species seen in the segment file(s)\n" " -noChrom Output a unique list of species seen with ref.\n" " -merge Merge species lists into largest superset.\n" ); } static void addSpeciesData(struct segBlock *sb) /* Keep a list of uniqe species we've seen. */ { struct segComp *sc; struct hashEl *hel; char *p; for (sc = sb->components; sc != NULL; sc = sc->next) { if ((p = strchr(sc->src, '.')) != NULL) *p = '\0'; /* If we haven't seen this species yet, add it to the list. */ if ((hel = hashLookup(exists, sc->src)) == NULL) { hashAdd(exists, sc->src, (void *) NULL); slNameAddHead(&keyList, sc->src); } if (p != NULL) *p = '.'; } } static boolean listsEquivalent(struct slName *list1, struct slName *list2) /* Check to see if two name lists are equivalent (same size and names). */ { struct slName *nl; if (slCount(list1) != slCount(list2)) return(FALSE); for (nl = list1; nl != NULL; nl = nl->next) if (! slNameInList(list2, nl->name)) return(FALSE); return(TRUE); } static boolean inLists(struct slName *list, struct nameListList *lists) /* Check to see if the given list is already in lists. */ { struct nameListList *nll; for (nll = lists; nll != NULL; nll = nll->next) if (listsEquivalent(nll->nameList, list)) return(TRUE); return(FALSE); } static void addData(struct segBlock *sb, struct segComp *refComp, char *key) /* Keep a list of unique keys. For each key, maintain a lists of uniq species seen with that key. */ { struct slName *speciesList; struct hashEl *hel; struct nameListList *data, *new; /* Generate a list of species in this block other than the reference species. */ speciesList = segSecSpeciesList(sb, refComp, '.'); /* Get the list of species lists associated with this key. */ if ((hel = hashLookup(exists, key)) == NULL) { hel = hashAdd(exists, key, (void *) NULL); slNameAddHead(&keyList, key); } data = (struct nameListList *) hel->val; /* Add this list of species if it's not aready there. */ if (inLists(speciesList, data)) slNameFreeList(&speciesList); else { AllocVar(new); new->nameList = speciesList; slAddHead(&data, new); } hel->val = (void *) data; } void segInfo(char *inSeg) /* segInfo - Get info about a segment file. */ { struct segFile *sf = segOpen(inSeg); struct segBlock *sb; struct segComp *refComp; while ((sb = segNext(sf)) != NULL) { if (species) addSpeciesData(sb); else { /* Set ref if it wasn't set on the command line. */ if (ref == NULL && ((ref = segFirstCompSpecies(sb, '.')) == NULL)) errAbort("ref is not set and the first segment block has no " "components."); refComp = segFindCompSpecies(sb, ref, '.'); if (noChrom) addData(sb, refComp, ref); else addData(sb, refComp, refComp->src); } segBlockFree(&sb); } segFileFree(&sf); } static int nameSubset(struct slName *a, struct slName *b) /* Return 1 if the set of names in 'b' is a subset of the set of names in 'a'. Return 2 if the set of names in 'a' is a subset of the set of names in 'b'. Return 0 otherwise. */ { int aSize = slCount(a), bSize = slCount(b); struct slName *small = a, *big = b, *nl; if (bSize < aSize) { small = b; big = a; } /* Every name in the small list MUST be in the big list. */ for (nl = small; nl != NULL; nl = nl->next) if (! slNameInList(big, nl->name)) return(0); if (big == a) return(1); else return(2); } static void mergeLists(void) /* Merge lists for each key which have a subset/superset relationship. */ { struct slName *key, *temp; struct hashEl *hel; struct nameListList *data, *master, *prev, *curr; int sub; for (key = keyList; key != NULL; key = key->next) { if ((hel = hashLookup(exists, key->name)) == NULL) errAbort("Can't find %s in exists hash!", key->name); master = data = (struct nameListList *) hel->val; while (master->next != NULL) { prev = master; curr = master->next; while (curr != NULL) { /* Check for a subset relationship between master and curr. */ if ((sub = nameSubset(master->nameList, curr->nameList)) != 0) { /* Swap nameLists so that curr contains the subset. */ if (sub == 2) { temp = master->nameList; master->nameList = curr->nameList; curr->nameList = temp; } /* Delete curr (the subset). */ prev->next = curr->next; slNameFreeList(&curr->nameList); freeMem(curr); /* Start over. */ prev = master; curr = master->next; } else { prev = curr; curr = curr->next; } } if (master->next != NULL) master = master->next; } } } static void outputData(void) /* Print out the data we collected. */ { struct hashEl *hel; struct nameListList *data, *nll; struct slName *key, *slist; for (key = keyList; key != NULL; key = key->next) { printf("%s", key->name); if (species) printf("\n"); else { printf(" "); if ((hel = hashLookup(exists, key->name)) == NULL) errAbort("Can't find %s in exists hash!", key->name); data = (struct nameListList *) hel->val; slReverse(&data); for (nll = data; nll != NULL; nll = nll->next) { slReverse(&nll->nameList); for (slist = nll->nameList; slist != NULL; slist = slist->next) { printf("%s", slist->name); if (slist->next != NULL) printf("."); } if (nll->next != NULL) printf(","); } printf("\n"); } } } int main(int argc, char *argv[]) /* Process command line. */ { int i; optionInit(&argc, argv, options); if (argc < 2) usage(); merge = optionExists("merge"); noChrom = optionExists("noChrom"); ref = optionVal("ref", ref); species = optionExists("species"); exists = newHash(5); for (i = 1; i < argc; i++) segInfo(argv[i]); slReverse(&keyList); if (merge & !species) mergeLists(); outputData(); hashFreeWithVals(&exists, slNameFreeList); return(EXIT_SUCCESS); }