#include "common.h" #include "dnautil.h" #include "dnaseq.h" #include "fa.h" boolean faReadNext(FILE *f, char *defaultName, boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq) /* Read next sequence from .fa file. Return sequence in retSeq. If retCommentLine is non-null * return the '>' line in retCommentLine. The whole thing returns FALSE at end of file. */ { char lineBuf[512]; int lineSize; char *words[1]; int c; long offset = ftell(f); size_t dnaSize = 0; DNA *dna, *sequence, b; int bogusChars = 0; char *name = defaultName; if (name == NULL) name = ""; dnaUtilOpen(); if (retCommentLine != NULL) *retCommentLine = NULL; *retSeq = NULL; /* Skip first lines until it starts with '>' */ for (;;) { if(fgets(lineBuf, sizeof(lineBuf), f) == NULL) { return FALSE; } lineSize = strlen(lineBuf); if (lineBuf[0] == '>') { if (retCommentLine != NULL) *retCommentLine = cloneString(lineBuf); offset = ftell(f); chopByWhite(lineBuf, words, ArraySize(words)); name = words[0]+1; break; } else if (!mustStartWithComment) { fseek(f, offset, SEEK_SET); break; } else offset += lineSize; } /* Count up DNA. */ for (;;) { c = fgetc(f); if (c == EOF || c == '>') break; if (!isspace(c) && !isdigit(c)) { ++dnaSize; } } /* Allocate DNA and fill it up from file. */ dna = sequence = needLargeMem(dnaSize+1); fseek(f, offset, SEEK_SET); for (;;) { c = fgetc(f); if (c == EOF || c == '>') break; if (!isspace(c) && !isdigit(c)) { if ((b = ntChars[c]) == 0) b = 'n'; *dna++ = b; } } if (c == '>') ungetc(c, f); *dna = 0; *retSeq = newDnaSeq(sequence, dnaSize, name); return TRUE; } struct dnaSeq *faReadOneDnaSeq(FILE *f, char *defaultName, boolean mustStartWithComment) /* Read sequence from FA file. Assumes positioned at or before * the '>' at start of sequence. */ { struct dnaSeq *seq; if (!faReadNext(f, defaultName, mustStartWithComment, NULL, &seq)) return NULL; else return seq; } struct dnaSeq *faReadDna(char *fileName) /* Open fa file and read a single sequence from it. */ { FILE *f; struct dnaSeq *seq; f = mustOpen(fileName, "rb"); seq = faReadOneDnaSeq(f, fileName, FALSE); fclose(f); return seq; } struct dnaSeq *faReadAllDna(char *fileName) /* Return list of all sequences in FA file. */ { FILE *f; struct dnaSeq *seqList = NULL, *seq; f = mustOpen(fileName, "rb"); while ((seq = faReadOneDnaSeq(f, NULL, TRUE)) != NULL) slAddHead(&seqList, seq); fclose(f); slReverse(&seqList); return seqList; } struct dnaSeq *faFromMemText(char *text) /* Return a sequence from a .fa file that's been read into * a string in memory. This cannabalizes text, which should * be allocated with needMem. This buffer becomes part of * the returned dnaSeq, which may be freed normally with * freeDnaSeq. */ { char *name = ""; char *s, *d; struct dnaSeq *seq; int size = 0; char c; if (text[0] == '>') { char *end; s = strchr(text, '\n') + 1; name = text+1; end = skipToSpaces(name); if (end != NULL) *end = 0; } else s = text; AllocVar(seq); seq->name = cloneString(name); d = text; while ((c = *s++) != 0) { if ((c = ntChars[c]) != 0) { d[size++] = c; } } d[size] = 0; /* Put sequence into our little DNA structure. */ seq->dna = text; seq->size = size; return seq; } void faWriteNext(FILE *f, char *startLine, DNA *dna, int dnaSize) /* Write next sequence to fa file. */ { int dnaLeft = dnaSize; int lineSize; fprintf(f, ">%s\n", startLine); while (dnaLeft > 0) { lineSize = dnaLeft; if (lineSize > 50) lineSize = 50; mustWrite(f, dna, lineSize); fputc('\n', f); dna += lineSize; dnaLeft -= lineSize; } } void faWrite(char *fileName, char *startLine, DNA *dna, int dnaSize) /* Write out FA file or die trying. */ { FILE *f = mustOpen(fileName, "w"); faWriteNext(f, startLine, dna, dnaSize); fclose(f); }