/* dnaSeq.c - stuff to manage DNA sequences. * * This file is copyright 2002 Jim Kent, but license is hereby * granted for all use - public, private or commercial. */ #include "common.h" #include "dnaseq.h" #include "bits.h" #include "hash.h" #include "obscure.h" struct dnaSeq *newDnaSeq(DNA *dna, int size, char *name) /* Create a new DNA seq. */ { struct dnaSeq *seq; seq = needMem(sizeof(*seq)); if (name != NULL) seq->name = cloneString(name); seq->dna = dna; seq->size = size; seq->mask = NULL; return seq; } struct dnaSeq *cloneDnaSeq(struct dnaSeq *orig) /* Duplicate dna sequence in RAM. */ { struct dnaSeq *seq = CloneVar(orig); seq->name = cloneString(seq->name); seq->dna = needHugeMem(seq->size+1); memcpy(seq->dna, orig->dna, seq->size+1); seq->mask = NULL; if (orig->mask != NULL) { seq->mask = bitClone(orig->mask, seq->size); } return seq; } void freeDnaSeq(struct dnaSeq **pSeq) /* Free up DNA seq. (And unlink underlying resource node.) */ { struct dnaSeq *seq = *pSeq; if (seq == NULL) return; freeMem(seq->name); freeMem(seq->dna); bitFree(&seq->mask); freez(pSeq); } void freeDnaSeqList(struct dnaSeq **pSeqList) /* Free up list of DNA sequences. */ { struct dnaSeq *seq, *next; for (seq = *pSeqList; seq != NULL; seq = next) { next = seq->next; freeDnaSeq(&seq); } *pSeqList = NULL; } boolean seqIsLower(bioSeq *seq) /* Return TRUE if sequence is all lower case. */ { int size = seq->size, i; char *poly = seq->dna; for (i=0; idna, seq->size); } aaSeq *translateSeqN(struct dnaSeq *inSeq, unsigned offset, unsigned inSize, boolean stop) /* Return a translated sequence. Offset is position of first base to * translate. If size is 0 then use length of inSeq. */ { aaSeq *seq; DNA *dna = inSeq->dna; AA *pep, aa; int i, lastCodon; int actualSize = 0; assert(offset <= inSeq->size); if ((inSize == 0) || (inSize > (inSeq->size - offset))) inSize = inSeq->size - offset; lastCodon = offset + inSize - 3; AllocVar(seq); seq->dna = pep = needLargeMem(inSize/3+1); for (i=offset; i <= lastCodon; i += 3) { aa = lookupCodon(dna+i); if (aa == 0) { if (stop) break; else aa = 'Z'; } *pep++ = aa; ++actualSize; } *pep = 0; assert(actualSize <= inSize/3+1); seq->size = actualSize; seq->name = cloneString(inSeq->name); return seq; } aaSeq *translateSeq(struct dnaSeq *inSeq, unsigned offset, boolean stop) /* Return a translated sequence. Offset is position of first base to * translate. If stop is TRUE then stop at first stop codon. (Otherwise * represent stop codons as 'Z'). */ { return translateSeqN(inSeq, offset, 0, stop); } bioSeq *whichSeqIn(bioSeq **seqs, int seqCount, char *letters) /* Figure out which if any sequence letters is in. */ { aaSeq *seq; int i; for (i=0; idna <= letters && letters < seq->dna + seq->size) return seq; } internalErr(); return NULL; } Bits *maskFromUpperCaseSeq(bioSeq *seq) /* Allocate a mask for sequence and fill it in based on * sequence case. */ { int size = seq->size, i; char *poly = seq->dna; Bits *b = bitAlloc(size); for (i=0; inext) hashAddUnique(hash, seq->name, seq); return hash; } int dnaSeqCmpName(const void *va, const void *vb) /* Compare to sort based on sequence name. */ { const struct dnaSeq *a = *((struct dnaSeq **)va); const struct dnaSeq *b = *((struct dnaSeq **)vb); return strcmp(a->name, b->name); }