/* * Mathlib : A C Library of Special Functions * Copyright (C) 1998 Ross Ihaka * Copyright (C) 2000-2014 The R Core Team * Copyright (C) 2005 The R Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * http://www.r-project.org/Licenses/ * * SYNOPSIS * * #include * double rhyper(double NR, double NB, double n); * * DESCRIPTION * * Random variates from the hypergeometric distribution. * Returns the number of white balls drawn when kk balls * are drawn at random from an urn containing nn1 white * and nn2 black balls. * * REFERENCE * * V. Kachitvichyanukul and B. Schmeiser (1985). * ``Computer generation of hypergeometric random variates,'' * Journal of Statistical Computation and Simulation 22, 127-145. * * The original algorithm had a bug -- R bug report PR#7314 -- * giving numbers slightly too small in case III h2pe * where (m < 100 || ix <= 50) , see below. */ #include "nmath.h" #include "dpq.h" /* afc(i) := ln( i! ) [logarithm of the factorial i. * If (i > 7), use Stirling's approximation, otherwise use table lookup. */ static double afc(int i) { const static double al[9] = { 0.0, 0.0,/*ln(0!)=ln(1)*/ 0.0,/*ln(1!)=ln(1)*/ 0.69314718055994530941723212145817,/*ln(2) */ 1.79175946922805500081247735838070,/*ln(6) */ 3.17805383034794561964694160129705,/*ln(24)*/ 4.78749174278204599424770093452324, 6.57925121201010099506017829290394, 8.52516136106541430016553103634712 /*, 10.60460290274525022841722740072165*/ }; double di, value; if (i < 0) { MATHLIB_WARNING(("rhyper.c: afc(i), i=%d < 0 -- SHOULD NOT HAPPEN!\n"), i); return -1;/* unreached (Wall) */ } else if (i <= 7) { value = al[i + 1]; } else { di = i; value = (di + 0.5) * log(di) - di + 0.08333333333333 / di - 0.00277777777777 / di / di / di + 0.9189385332; } return value; } double rhyper(double nn1in, double nn2in, double kkin) { const static double con = 57.56462733; const static double deltal = 0.0078; const static double deltau = 0.0034; const static double scale = 1e25; /* extern double afc(int); */ int nn1, nn2, kk; int i, ix; Rboolean reject, setup1, setup2; double e, f, g, p, r, t, u, v, y; double de, dg, dr, ds, dt, gl, gu, nk, nm, ub; double xk, xm, xn, y1, ym, yn, yk, alv; /* These should become `thread_local globals' : */ static int ks = -1; static int n1s = -1, n2s = -1; static int k, m; static int minjx, maxjx, n1, n2; static double a, d, s, w; static double tn, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3; /* check parameter validity */ if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin)) ML_ERR_return_NAN; nn1 = (int) R_forceint(nn1in); nn2 = (int) R_forceint(nn2in); kk = (int) R_forceint(kkin); if (nn1 < 0 || nn2 < 0 || kk < 0 || kk > nn1 + nn2) ML_ERR_return_NAN; /* if new parameter values, initialize */ reject = TRUE; if (nn1 != n1s || nn2 != n2s) { setup1 = TRUE; setup2 = TRUE; } else if (kk != ks) { setup1 = FALSE; setup2 = TRUE; } else { setup1 = FALSE; setup2 = FALSE; } if (setup1) { n1s = nn1; n2s = nn2; tn = nn1 + nn2; if (nn1 <= nn2) { n1 = nn1; n2 = nn2; } else { n1 = nn2; n2 = nn1; } } if (setup2) { ks = kk; if (kk + kk >= tn) { k = (int)(tn - kk); } else { k = kk; } } if (setup1 || setup2) { m = (int) ((k + 1.0) * (n1 + 1.0) / (tn + 2.0)); minjx = imax2(0, k - n2); maxjx = imin2(n1, k); } /* generate random variate --- Three basic cases */ if (minjx == maxjx) { /* I: degenerate distribution ---------------- */ ix = maxjx; /* return ix; No, need to unmangle */ /* return appropriate variate */ if (kk + kk >= tn) { if (nn1 > nn2) { ix = kk - nn2 + ix; } else { ix = nn1 - ix; } } else { if (nn1 > nn2) ix = kk - ix; } return ix; } else if (m - minjx < 10) { /* II: inverse transformation ---------- */ if (setup1 || setup2) { if (k < n2) { w = exp(con + afc(n2) + afc(n1 + n2 - k) - afc(n2 - k) - afc(n1 + n2)); } else { w = exp(con + afc(n1) + afc(k) - afc(k - n2) - afc(n1 + n2)); } } L10: p = w; ix = minjx; u = unif_rand() * scale; L20: if (u > p) { u -= p; p *= (n1 - ix) * (k - ix); ix++; p = p / ix / (n2 - k + ix); if (ix > maxjx) goto L10; goto L20; } } else { /* III : h2pe --------------------------------------------- */ if (setup1 || setup2) { s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn); /* remark: d is defined in reference without int. */ /* the truncation centers the cell boundaries at 0.5 */ d = (int) (1.5 * s) + .5; xl = m - d + .5; xr = m + d + .5; a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m); kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl)) - afc((int) (k - xl)) - afc((int) (n2 - k + xl))); kr = exp(a - afc((int) (xr - 1)) - afc((int) (n1 - xr + 1)) - afc((int) (k - xr + 1)) - afc((int) (n2 - k + xr - 1))); lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1)); lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr)); p1 = d + d; p2 = p1 + kl / lamdl; p3 = p2 + kr / lamdr; } L30: u = unif_rand() * p3; v = unif_rand(); if (u < p1) { /* rectangular region */ ix = (int) (xl + u); } else if (u <= p2) { /* left tail */ ix = (int) (xl + log(v) / lamdl); if (ix < minjx) goto L30; v = v * (u - p1) * lamdl; } else { /* right tail */ ix = (int) (xr - log(v) / lamdr); if (ix > maxjx) goto L30; v = v * (u - p2) * lamdr; } /* acceptance/rejection test */ if (m < 100 || ix <= 50) { /* explicit evaluation */ /* The original algorithm (and TOMS 668) have f = f * i * (n2 - k + i) / (n1 - i) / (k - i); in the (m > ix) case, but the definition of the recurrence relation on p134 shows that the +1 is needed. */ f = 1.0; if (m < ix) { for (i = m + 1; i <= ix; i++) f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i; } else if (m > ix) { for (i = ix + 1; i <= m; i++) f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1); } if (v <= f) { reject = FALSE; } } else { /* squeeze using upper and lower bounds */ y = ix; y1 = y + 1.0; ym = y - m; yn = n1 - y + 1.0; yk = k - y + 1.0; nk = n2 - k + y1; r = -ym / y1; s = ym / yn; t = ym / yk; e = -ym / nk; g = yn * yk / (y1 * nk) - 1.0; dg = 1.0; if (g < 0.0) dg = 1.0 + g; gu = g * (1.0 + g * (-0.5 + g / 3.0)); gl = gu - .25 * (g * g * g * g) / dg; xm = m + 0.5; xn = n1 - m + 0.5; xk = k - m + 0.5; nm = n2 - k + xm; ub = y * gu - m * gl + deltau + xm * r * (1. + r * (-0.5 + r / 3.0)) + xn * s * (1. + s * (-0.5 + s / 3.0)) + xk * t * (1. + t * (-0.5 + t / 3.0)) + nm * e * (1. + e * (-0.5 + e / 3.0)); /* test against upper bound */ alv = log(v); if (alv > ub) { reject = TRUE; } else { /* test against lower bound */ dr = xm * (r * r * r * r); if (r < 0.0) dr /= (1.0 + r); ds = xn * (s * s * s * s); if (s < 0.0) ds /= (1.0 + s); dt = xk * (t * t * t * t); if (t < 0.0) dt /= (1.0 + t); de = nm * (e * e * e * e); if (e < 0.0) de /= (1.0 + e); if (alv < ub - 0.25 * (dr + ds + dt + de) + (y + m) * (gl - gu) - deltal) { reject = FALSE; } else { /* * Stirling's formula to machine accuracy */ if (alv <= (a - afc(ix) - afc(n1 - ix) - afc(k - ix) - afc(n2 - k + ix))) { reject = FALSE; } else { reject = TRUE; } } } } if (reject) goto L30; } /* return appropriate variate */ if (kk + kk >= tn) { if (nn1 > nn2) { ix = kk - nn2 + ix; } else { ix = nn1 - ix; } } else { if (nn1 > nn2) ix = kk - ix; } return ix; }