/* oligoTm - calculate melting temperature of relatively short DNA sequences. * This is based on the nearest-neighbor thermodynamics of bases from Breslauer, * Frank, Bloecker, and Markey, Proc. Natl. Acad. Sci. USA, vol 83, page 3748, * and uses work from see Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, * no 21. This code was imported from the oligotm module of Whitehead Institute's * primer3 program, and adapted into UCSC conventions by Jim Kent. Any redistribution * of this code should contain the following copyright notice from Whitehead: * * Copyright (c) 1996,1997,1998,1999,2000,2001,2004 * Whitehead Institute for Biomedical Research. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. Redistributions of * source code must also reproduce this information in the source code itself. * * 2. If the program is modified, redistributions must include a notice * (in the same places as above) indicating that the redistributed program is * not identical to the version distributed by Whitehead Institute. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgment: * This product includes software developed by the * Whitehead Institute for Biomedical Research. * * 4. The name of the Whitehead Institute may not be used to endorse or * promote products derived from this software without specific prior written * permission. * * We also request that use of this software be cited in publications as * * Rozen, S., Skaletsky, H. \"Primer3 on the WWW for general users * and for biologist programmers.\" In S. Krawetz and S. Misener, eds. * Bioinformatics Methods and Protocols in the series Methods in * Molecular Biology. Humana Press, Totowa, NJ, 2000, pages 365-386. * Code available at * http://fokker.wi.mit.edu/primer3/. * * THIS SOFTWARE IS PROVIDED BY THE WHITEHEAD INSTITUTE ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE WHITEHEAD INSTITUTE BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "common.h" #include "oligoTm.h" /* * Tables of nearest-neighbor thermodynamics for DNA bases. See Breslauer, * Frank, Bloecker, and Markey, Proc. Natl. Acad. Sci. USA, vol 83, page 3748, * table 2. */ #define S_A_A 240 #define S_A_C 173 #define S_A_G 208 #define S_A_T 239 #define S_A_N 215 #define S_C_A 129 #define S_C_C 266 #define S_C_G 278 #define S_C_T 208 #define S_C_N 220 #define S_G_A 135 #define S_G_C 267 #define S_G_G 266 #define S_G_T 173 #define S_G_N 210 #define S_T_A 169 #define S_T_C 135 #define S_T_G 129 #define S_T_T 240 #define S_T_N 168 #define S_N_A 168 #define S_N_C 210 #define S_N_G 220 #define S_N_T 215 #define S_N_N 203 #define H_A_A 91 #define H_A_C 65 #define H_A_G 78 #define H_A_T 86 #define H_A_N 80 #define H_C_A 58 #define H_C_C 110 #define H_C_G 119 #define H_C_T 78 #define H_C_N 91 #define H_G_A 56 #define H_G_C 111 #define H_G_G 110 #define H_G_T 65 #define H_G_N 85 #define H_T_A 60 #define H_T_C 56 #define H_T_G 58 #define H_T_T 91 #define H_T_N 66 #define H_N_A 66 #define H_N_C 85 #define H_N_G 91 #define H_N_T 80 #define H_N_N 80 /* Delta G's of disruption * 1000. */ #define G_A_A 1900 #define G_A_C 1300 #define G_A_G 1600 #define G_A_T 1500 #define G_A_N 1575 #define G_C_A 1900 #define G_C_C 3100 #define G_C_G 3600 #define G_C_T 1600 #define G_C_N 2550 #define G_G_A 1600 #define G_G_C 3100 #define G_G_G 3100 #define G_G_T 1300 #define G_G_N 2275 #define G_T_A 900 #define G_T_C 1600 #define G_T_G 1900 #define G_T_T 1900 #define G_T_N 1575 #define G_N_A 1575 #define G_N_C 2275 #define G_N_G 2550 #define G_N_T 1575 #define G_N_N 1994 #define A_CHAR 'A' #define G_CHAR 'G' #define T_CHAR 'T' #define C_CHAR 'C' #define N_CHAR 'N' #define CATID5(A,B,C,D,E) A##B##C##D##E #define CATID2(A,B) A##B #define DO_PAIR(LAST,THIS) \ if (CATID2(THIS,_CHAR) == c) { \ dh += CATID5(H,_,LAST,_,THIS); \ ds += CATID5(S,_,LAST,_,THIS); \ goto CATID2(THIS,_STATE); \ } #define STATE(LAST) \ CATID2(LAST,_STATE): \ c = *s; s++; \ DO_PAIR(LAST,A) \ else DO_PAIR(LAST,T) \ else DO_PAIR(LAST,G) \ else DO_PAIR(LAST,C) \ else DO_PAIR(LAST,N) \ else if ('\0' == c) \ goto DONE; \ else goto ERROR \ double oligoTm(char *dna, double DNA_nM, double K_mM) /* Calculate melting point of short DNA sequence given DNA concentration in * nanomoles, and salt concentration in millimoles. This is calculated using eqn * (ii) in Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, page * 6410, with tables of nearest-neighbor thermodynamics for DNA bases as * provided in Breslauer, Frank, Bloecker, and Markey, * Proc. Natl. Acad. Sci. USA, vol 83, page 3748. */ { register int dh = 0, ds = 108; register char c; char *dupe = cloneString(dna); char *s = dupe; double delta_H, delta_S; touppers(s); /* Use a finite-state machine (DFA) to calucluate dh and ds for s. */ c = *s; s++; if (c == 'A') goto A_STATE; else if (c == 'G') goto G_STATE; else if (c == 'T') goto T_STATE; else if (c == 'C') goto C_STATE; else if (c == 'N') goto N_STATE; else goto ERROR; STATE(A); STATE(T); STATE(G); STATE(C); STATE(N); DONE: /* dh and ds are now computed for the given sequence. */ delta_H = dh * -100.0; /* * Nearest-neighbor thermodynamic values for dh * are given in 100 cal/mol of interaction. */ delta_S = ds * -0.1; /* * Nearest-neighbor thermodynamic values for ds * are in in .1 cal/K per mol of interaction. */ /* * See Rychlik, Spencer, Roads, Nucleic Acids Research, vol 18, no 21, * page 6410, eqn (ii). */ freeMem(dupe); return delta_H / (delta_S + 1.987 * log(DNA_nM/4000000000.0)) - 273.15 + 16.6 * log10(K_mM/1000.0); ERROR: /* * length of s was less than 2 or there was an illegal character in * s. */ freeMem(dupe); errAbort("Not a valid oligo in oligoTm."); return 0; } #undef DO_PAIR #define DO_PAIR(LAST,THIS) \ if (CATID2(THIS,_CHAR) == c) { \ dg += CATID5(G,_,LAST,_,THIS); \ goto CATID2(THIS,_STATE); \ } double oligoDg(char *dna) /* Calculate dg (change in Gibb's free energy) from melting oligo * the nearest neighbor model. Seq should be relatively short, given * the characteristics of the nearest neighbor model (36 bases or less * is best). */ { register int dg = 0; register char c; char *dupe = cloneString(dna); char *s = dupe; /* Use a finite-state machine (DFA) to calculate dg s. */ c = *s; s++; if (c == 'A') goto A_STATE; else if (c == 'G') goto G_STATE; else if (c == 'T') goto T_STATE; else if (c == 'C') goto C_STATE; else if (c == 'N') goto N_STATE; else goto ERROR; STATE(A); STATE(T); STATE(G); STATE(C); STATE(N); DONE: /* dg is now computed for the given sequence. */ freeMem(dupe); return dg / 1000.0; ERROR: freeMem(dupe); errAbort("Not a valid oligo in oligoDg."); return 0; } double longSeqTm(char *s, int start, int len, double salt_conc) /* Calculate the melting temperature of substr(seq, start, length) using the * formula from Bolton and McCarthy, PNAS 84:1390 (1962) as presented in * Sambrook, Fritsch and Maniatis, Molecular Cloning, p 11.46 (1989, CSHL * Press). * * Tm = 81.5 + 16.6(log10([Na+])) + .41*(%GC) - 600/length * * Where [Na+] is the molar sodium concentration, (%GC) is the percent of Gs * and Cs in the sequence, and length is the length of the sequence. * * A similar formula is used by the prime primer selection program in GCG * (http://www.gcg.com), which instead uses 675.0 / length in the last term * (after F. Baldino, Jr, M.-F. Chesselet, and M.E. Lewis, Methods in * Enzymology 168:766 (1989) eqn (1) on page 766 without the mismatch and * formamide terms). The formulas here and in Baldino et al. assume Na+ rather * than K+. According to J.G. Wetmur, Critical Reviews in BioChem. and * Mol. Bio. 26:227 (1991) 50 mM K+ should be equivalent in these formulae to .2 * M Na+. * * This function takes salt_conc to be the millimolar (mM) concentration, * since mM is the usual units in PCR applications. */ { int GC_count = 0; char *p, *end; if(start + len > strlen(s) || start < 0 || len <= 0) errAbort("bad input to longSeqTm"); end = &s[start + len]; /* Length <= 0 is nonsensical. */ for (p = &s[start]; p < end; p++) { if ('G' == *p || 'g' == *p || 'C' == *p || 'c' == *p) GC_count++; } return 81.5 + (16.6 * log10(salt_conc / 1000.0)) + (41.0 * (((double) GC_count) / len)) - (600.0 / len); } double seqTm(char *seq, double dna_conc, double salt_conc) /* Figure out melting temperature of sequence of any length given * dna and salt concentration. */ { int len = strlen(seq); return (len > 36) ? longSeqTm(seq, 0, len, salt_conc) : oligoTm(seq, dna_conc, salt_conc); }