/* File: glfSoloPrior.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (C) Genome Research Limited, 2008 * Description: Apply a prior to a single .glf file based on a given mutation rate * Exported functions: glfSoloPrior * HISTORY: * Last edited: May 4 15:42 2009 (lj4@sanger.ac.uk) * * April 30 15:24 2009 (lj4@sanger.ac.uk): updated code to handle GLF version 3 files * * Jan 26 14:38 2009 (rd): added HETSUPPRESS * Created: Mon Aug 4 21:58:02 2008 (rd) *------------------------------------------------------------------- */ #include #include #include #include #include #include //#include "readseq.h" #include "glf.h" static double THETA = 0.001 ; /* population scaled mutation rate */ static double HETSUPPRESS = 0 ; /* score to reduce all het calls by */ /**********************************************************/ static int prior[16][10] ; /* index over reference base, genotype */ static void makeSoloPrior (void) { int i, b, ref ; for (ref = 0 ; ref < 16 ; ++ref) for (i = 0 ; i < 10 ; ++i) { b = glfBase[i] ; if (!(b & ~ref)) /* ie b is compatible with ref */ prior[ref][i] = 0 ; else if (b & ref) /* ie one allele of b is compatible with ref */ prior[ref][i] = logPhred(THETA) ; else if (isHom[b]) /* single mutation homozygote */ prior[ref][i] = logPhred(0.5*THETA) ; else /* two mutations */ prior[ref][i] = logPhred(THETA*THETA) ; if (isHet[b]) prior[ref][i] += HETSUPPRESS ; } } /*********************************************************/ int qAddTable[1024] ; #define qAdd(x,y) (x - qAddTable[512+y-x]) static void qAddTableInit (void) { int i ; for (i = 0 ; i < 1000 ; ++i) { double e = 1 + expPhred(i-512) ; qAddTable[i] = -logPhred(e) ; } } /*********************************************************/ static int soloProcess (glfFile fpIn, glfFile fpOut) { qAddTableInit(); glf3_header_t *h; char *name; glf3_t *g3; int len; int lkOut[10] ; h = glf3_header_read(fpIn); glf3_header_write(fpOut,h); glf3_header_destroy(h); g3 = glf3_init1(); while ((name = glf3_ref_read(fpIn, &len)) != 0) { fprintf(stderr,"chr: %s\n",name); glf3_ref_write(fpOut,name,len); int pos = 0; while (glf3_read1(fpIn, g3) && g3->rtype != GLF3_RTYPE_END) { pos += g3->offset; if (g3->ref_base != 15) /* do nothing if ref == N (centromeres etc.) */ { int qSum = 255 ; int qMin = 1000 ; int j ; for (j = 0 ; j < 10 ; ++j) { int x = g3->lk[j] + prior[g3->ref_base][j] ; qSum = qAdd (qSum, x) ; if (x < qMin) qMin = x ; lkOut[j] = x ; } if (qMin) for (j = 0 ; j < 10 ; ++j) lkOut[j] -= qMin ; for (j = 0 ; j < 10 ; ++j) if (lkOut[j] > 255) g3->lk[j] = 255 ; else g3->lk[j] = lkOut[j] ; g3->min_lk += qSum + qMin ; } glf3_write1(fpOut,g3); } g3->rtype = GLF3_RTYPE_END; glf3_write1(fpOut,g3); free(name); } /* clean everything up */ glf3_destroy1(g3); } /**********************************************************/ int glfSoloPrior (int argc, char *argv[]) { glfFile fpIn, fpOut ; int result = 0 ; --argc ; ++argv ; /* remove program command */ while (argc) if (argc > 1 && !strcmp (*argv, "-theta")) { --argc ; ++argv ; THETA = atof (*argv) ; --argc ; ++ argv ; } else if (argc > 1 && !strcmp (*argv, "-het")) { --argc ; ++argv ; HETSUPPRESS = atoi (*argv) ; --argc ; ++ argv ; } else if (!strcmp (*argv, "-help") || !strcmp (*argv, "-h")) { fprintf (stderr, "usage: glf soloPrior [-theta THETA] [-het HETSUPPRESS] < in.glz > out.glz\n") ; fprintf (stderr, "soloPrior reweights the likelihoods by the probability of difference from the reference,\n") ; fprintf (stderr, "assuming the reference is a correct haplotype from the same population.\n") ; fprintf (stderr, "All het likelihoods are reduced by HETSUPPRESS - e.g. set to 50 to model haploid chromosomes.") ; fprintf (stderr, "default value of THETA is 0.001, and of HETSUPPRESS is 0.\n") ; return 1 ; } else { fprintf (stderr, "unknown argument %s : type \"glf soloPrior -help\" for help\n", *argv) ; return -1 ; } if (!(fpIn =bgzf_fdopen(fileno(stdin), "r"))) { fprintf (stderr, "failed to open stdin as .glz file\n") ; return 1 ; } if (!(fpOut = bgzf_fdopen(fileno(stdout), "w"))) { fprintf (stderr, "failed to open stdout as .glz file\n") ; return 1 ; } makeSoloPrior () ; soloProcess (fpIn, fpOut) ; bgzf_close(fpIn); bgzf_close(fpOut); return 0 ; } /***************** end of file ****************/