/* File: glfSnpCall.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (C) Genome Research Limited, 2008 *------------------------------------------------------------------- * Description: Calls variant sites from a glf file * Exported functions: glfSnpCall * HISTORY: * Last edited: May 5 20:39 2009 (lj4@sanger.ac.uk) * * April 30 15:24 2009 (lj4@sanger.ac.uk): updated code to handle GLF version 3 files * Created: Tue Sep 2 00:36:34 2008 (rd) *------------------------------------------------------------------- */ #include #include #include #include #include "glf.h" #include "bgzf.h" /****************************************************/ /* output format spec from http://maq.sourceforge.net/maq-manpage.shtml 080901 Extract SNP sites. Each line consists of chr chromosome, x position, r reference base, b consensus base, q Phred-like consensus quality, d read depth, n the average number of hits of reads covering this position, max_mapQ the highest mapping quality of the reads covering the position, nqs the minimum consensus quality in the 3bp flanking regions at each side of the site (6bp in total), b2 the second best call, q2 log likelihood ratio of the second best and the third best call, b3 and the third best call. Heng uses q2 and b23 to give the SNP detection quality as follows: if b2 is the reference, then SNP quality is q, else if b3 is the reference then SNP quality is q + q2 (or if none of b, b2, b3 are the reference) I think the SNP detection quality should be logPhred(p_ref). So I will set q2 to be this - q, so that whenever snpfilter uses it the right value will be obtained. */ /****************************************************/ static void report (char *name, int pos, glf3_t *g, glf3_t *buf) { int i, k ; int k1, k2, k3, v1 = 256, v2 = 256, v3 = 256 ; int q, qsnp, nqs = 255 ; /* the quality of the best call at this site, and of a SNP call */ q = quality(g) ; qsnp = g->lk[baseGlf[g->ref_base]] + qsumbit ; /* find best three calls */ for (k = 0 ; k < 10 ; ++k) if (g->lk[k] < v3) if (g->lk[k] < v2) if (g->lk[k] < v1) { k3 = k2 ; k2 = k1 ; k1 = k ; v3 = v2 ; v2 = v1 ; v1 = g->lk[k] ; } else { k3 = k2 ; k2 = k ; v3 = v2 ; v2 = g->lk[k] ; } else { k3 = k ; v3 = g->lk[k] ; } /* calculate nqs */ for (i = 0 ; i < 7 ; ++i, ++buf) if (buf != g) { int q = quality(buf) ; if (q < nqs) nqs = q ; } printf ("%s\t%d\t%c\t", name, pos+1, iupac[g->ref_base]) ; printf ("%c\t%d\t", iupac[glfBase[k1]], q) ; printf ("%d\t%.2f\t%d\t%d\t", g->depth, 0.00, g->rms_mapQ, nqs) ; /* can't give av_hits, so set to 0 */ printf ("%c\t%d\t%c\n", iupac[glfBase[k2]], qsnp - q, iupac[glfBase[k3]]) ; } int glfSnpCall (int argc, char *argv[]) { int nameLen, chrLen ; glfFile fpIn ; --argc ; ++argv ; /* remove program command */ while (argc) if (!strcmp (*argv, "-help") || !strcmp (*argv, "-h")) { fprintf (stderr, "usage: glf snpCall < in.glf > out.snp\n") ; fprintf (stderr, "snpCall calls non-homozygous reference bases from .glf file in maq SNP format.\n") ; fprintf (stderr, "Any priors information needs to be applied beforehand; this just normalises the likelihoods,\n") ; fprintf (stderr, "treating them as posteriors.\n") ; return 1 ; } else { fprintf (stderr, "unknown argument %s : type \"glf snpCall -help\" for help\n", *argv) ; return -1 ; } if (!(fpIn = bgzf_fdopen(fileno(stdin), "r"))) { fprintf (stderr, "failed to open stdin as .glz file\n") ; return 1 ; } glf3_header_t *h; char *name; glf3_t *g3, *g3Buff[7]; int len, pos[7]; h = glf3_header_read(fpIn); g3 = glf3_init1(); int i; for (i = 0; i < 7; i++) g3Buff[i] = glf3_init1(); while ((name = glf3_ref_read(fpIn, &len)) != 0) { for (i = 0; i < 7; i++) pos[i] = -1; i = 0; while (glf3_read1(fpIn, g3Buff[i%7]) && g3Buff[i%7]->rtype != GLF3_RTYPE_END) { //printf("%d",abs((i-3)%7)); g3 = g3Buff[abs((i-3)%7)]; pos[abs((i-3)%7)] = pos[abs((i-4)%7)] + g3->offset; //printf("%d-",g3->ref_base); if (i >= 6 && isHom[g3->ref_base] && g3->lk[baseGlf[g3->ref_base]]) { report (name, pos[(i-3)%7], g3, *g3Buff) ; //printf("%d",i); //glf3_view1(name,g3,pos[(i-3)%7]); } i++; } free(name); } /* clean everything up */ glf3_header_destroy(h); glf3_destroy1(g3); bgzf_close(fpIn); return 0 ; } /************** end of file ************/