#include #include #include #include #include double EM( int** freq, double tol, int NMAX, double *D, double *Dprime, double *R2 ); double chisquare( int ** freq, double *CS, double *P, int *df ); int main( int argc, char **argv ) { int **freq; int i; double tol = 1.0e-6; int nmax = 100; double D, Dprime, R2; double CS, P; int df; freq = (int**)calloc(3,sizeof(int*)); for(i=0;i<3;i++) freq[i] = (int*)calloc( 3, sizeof(int)); for(i=0;i<3;i++) { scanf( "%d %d %d", &freq[i][0], &freq[i][1], &freq[i][2] ); } EM( freq, tol, nmax, &D, &Dprime, &R2 ); chisquare( freq, &CS, &P, &df ); printf( "CS %e P %e df %d\n", CS, P, df); for(i=0;i<20;i++) { CS = i; P = gsl_cdf_chisq_Q( CS, df ); printf( "CS %e P %e df %d\n", CS, P, df); } } double chisquare( int ** freq, double *CS, double *P, int *df ) { double margin1[3], margin2[3]; double expected[3][3]; double total=0.0, X = 0.0; int i, j, n=0, n1=0, n2=0; margin1[0] = margin1[1] = margin1[2] = 0.0; margin2[0] = margin2[1] = margin2[2] = 0.0; for(i=0;i<3;i++) expected[i][0] = expected[i][1] = expected[i][2] = 0.0; for(i=0;i<3;i++) for(j=0;j<3;j++) { margin1[i] += freq[i][j]; margin2[j] += freq[i][j]; total += freq[i][j]; } for(i=0;i<3;i++) { n1 += (margin1[i]>0); n2 += (margin2[i]>0); } *df = (n1-1)*(n2-1); if ( total > 0 && *df > 0 ) { for(i=0;i<3;i++) for(j=0;j<3;j++) { expected[i][j] = margin1[i]*margin2[j]/total; if ( expected[i][j] > 0 ) { X = freq[i][j] - margin1[i]*margin2[j]/total; *CS += X*X/expected[i][j]; n++; } } } if ( *df > 0 ) { *P = gsl_cdf_chisq_Q( *CS, *df ); return 1; } else { *P = 0.0; *CS = 0.0; *df = 0; return 0; } } double EM( int** freq, double tol, int NMAX, double *D, double *Dprime, double *R2 ) { int i, j; double x = -1; double total = freq[0][0] + freq[0][1] + freq[0][2] + freq[1][0] + freq[1][1] + freq[1][2] + freq[2][0] + freq[2][1] + freq[2][2]; if ( total > 0 ) { double p0 = (freq[0][0] + freq[0][1] + freq[0][2] + ( freq[1][0] + freq[1][1] + freq[1][2])/2.0)/total; double p1 = 1.0-p0; double q0 = (freq[0][0] + freq[1][0] + freq[2][0] + ( freq[0][1] + freq[1][1] + freq[2][1] )/2.0)/total; double q1 = 1-q0; printf( "p0 %g q0 %g total %g\n", p0, q0, total ); if ( p0 > 0 && p1 > 0 && q0 > 0 && q1 > 0 ) { double total2 = 2.0*total; double p00 = p0*q0; double p01 = p0*q1; double p10 = p1*q0; double p11 = p1*q1; x = p00*p11/(p00*p11 + p01*p10); double y = -1.0; double r00 = 2*freq[0][0] + freq[0][1] + freq[1][0]; double r01 = freq[0][1] + 2*freq[0][2] + freq[1][2]; double r10 = 2*freq[2][0] + freq[1][0] + freq[2][1]; double r11 = freq[2][1] + freq[1][2] + 2*freq[2][2]; int N = 0; printf( " x-y %g tol %g N %d NMAX %d\n", fabs(x-y), tol, N, NMAX ); while( fabs( x-y ) > tol && N < NMAX ) { double x1 = freq[1][1]*x; double x2 = (1-x)*freq[1][1]; p00 = ( r00 + x1)/total2; p01 = ( r01 + x2)/total2; p10 = ( r10 + x1)/total2; p11 = ( r11 + x2)/total2; y = x; x = p00*p11/(p00*p11 + p01*p10); printf( "N %d x %g total %g\n", N, x, p00+p01+p10+p11); N++; } if ( 1) { *D = fabs(p00*p11 - x*(1-x)); double Dmax = p0*q1 > p1*q0 ? p0*q1 : p1*q0; *Dprime = *D/Dmax; *R2 = (*D)*(*D)/( p0*p1*q0*q1 ); printf( "D %g Dprime %g R2 %g\n", *D, *Dprime, *R2 ); } } } return(x); }