/* sufaRepeatFind - Use a sufa index to find short sequence that repeat more than a given number * of times.. */ /* This file is copyright 2008 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "dnautil.h" #include "options.h" #include "sufa.h" int readSize = 25; int minCount = 5; void usage() /* Explain usage and exit. */ { errAbort( "sufaRepeatFind - Use a sufa index to find short sequence that repeat more than a given number of times.\n" "usage:\n" " sufaRepeatFind in.sufa out.srf\n" "options:\n" " -readSize=N Size of basic read size(default %d)\n" " -minCount=N Minimum count of repeats to report (default %d)\n" , readSize, minCount); } static struct optionSpec options[] = { {"readSize", OPTION_INT}, {"minCount", OPTION_INT}, {NULL, 0}, }; int countDnaSame(DNA *dna, int tileSize, bits32 *array, int arraySize) /* Count up number of places in a row in array, where the referenced DNA is the * same up to tileSize bases. */ { int i; DNA *first = dna + array[0]; for (i=1; iallDna; bits32 *array = sufa->array; int arraySize = sufa->header->arraySize; int i, sameSize; for (i=0; i= minCount) { mustWrite(f, dna + array[i], readSize); fprintf(f, " %d\n", sameSize); } } carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); readSize = optionInt("readSize", readSize); minCount = optionInt("minCount", minCount); sufaRepeatFind(argv[1], argv[2]); return 0; }