/* sizeSplitFa - split .fa file into two based on size of sequences. */ #include "common.h" #include "dnautil.h" #include "dnaseq.h" #include "fa.h" void writeLines50(FILE *f, DNA *dna) /* Write DNA 50 bp to a line. */ { int sizeLeft = strlen(dna); int lineSize; while (sizeLeft > 0) { lineSize = 50; if (lineSize > sizeLeft) lineSize = sizeLeft; mustWrite(f, dna, lineSize); fputc('\n', f); dna += lineSize; sizeLeft -= lineSize; } } int main(int argc, char *argv[]) { char *origName, *smallName, *largeName; int threshold, startSkip, endSkip; FILE *orig, *small, *large; char *comment; struct dnaSeq *seq; int seqCount = 0; if (argc != 7 || !isdigit(argv[2][0])) { errAbort("sizeSplitFa - split .fa file into two based on size and skip some on either end\n" "usage:\n" " sizeSplitFa orig.fa threshold small.fa large.fa startSkip endSkip"); } origName = argv[1]; threshold = atoi(argv[2]); smallName = argv[3]; largeName = argv[4]; startSkip = atoi(argv[5]); endSkip = atoi(argv[6]); orig = mustOpen(origName, "r"); small = mustOpen(smallName, "w"); large = mustOpen(largeName, "w"); while (faReadNext(orig, NULL, TRUE, &comment, &seq) ) { FILE *f = (seq->size <= threshold ? small : large); int size = seq->size; if ((++seqCount % 1000) == 0) printf("Processing sequence %d\n", seqCount); if (size > startSkip + endSkip) { DNA *dna = seq->dna; dna[size-endSkip] = 0; dna += startSkip; fputs(comment, f); writeLines50(f, dna); freez(&comment); freeDnaSeq(&seq); } } return 0; }