/* randomLines - Pick out random lines from file. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "obscure.h" int seed; void usage() /* Explain usage and exit. */ { errAbort( "randomLines - Pick out random lines from file\n" "usage:\n" " randomLines inFile count outFile\n" "options:\n" " -seed=N - Set seed used for randomizing, useful for debugging.\n" " -decomment - remove blank lines and those starting with \n" ); } static struct optionSpec options[] = { {"seed", OPTION_INT}, {"decomment", OPTION_BOOLEAN}, {NULL, 0}, }; boolean decomment = FALSE; void randomLines(char *inName, int count, char *outName) /* randomLines - Pick out random lines from file. */ { srand(seed); /* Read all lines of input and put into an array. */ struct slName *slPt, *slList= readAllLines(inName); int lineCount = slCount(slList); char **lineArray; AllocArray(lineArray, lineCount); int i; for (i=0, slPt=slList; inext) lineArray[i] = slPt->name; /* Avoid an infinite, or very long loop by not letting them ask for all * the lines except in the small case. */ int maxCount = lineCount/2; if (maxCount < 1000) maxCount = lineCount; if (count > maxCount) errAbort("%s has %d lines. Random lines will only output %d or less lines\n" "on a file this size. You asked for %d. Sorry.", inName, lineCount, maxCount, count); FILE *f = mustOpen(outName, "w"); int outCount = 0; while (outCount < count) { int randomIx = rand()%lineCount; char *line = lineArray[randomIx]; if (line) { fprintf(f, "%s\n", line); ++outCount; lineArray[randomIx] = NULL; } } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4 || !isdigit(argv[2][0])) usage(); seed = optionInt("seed", (int)time(NULL)); decomment = optionExists("decomment"); randomLines(argv[1], atoi(argv[2]), argv[3]); return 0; }