/* File: glfFastq.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (C) Genome Research Limited, 2008 *------------------------------------------------------------------- * Description: write out genotype calls as fastq from glf already processed for priors * derived from glfSubCall.c * Exported functions: * HISTORY: * Last edited: Oct 22 16:11 2008 (rd) * Created: Fri Sep 19 14:55:48 2008 (rd) *------------------------------------------------------------------- */ #include #include #include #include #include #include "glf.h" #include "readseq.h" static int NQS_MIN = 20 ; static int Q_MIN = 10 ; static int DEPTH_MIN = 3 ; static int DEPTH_MAX = 256 ; /******************************************/ static void report (char *name, int pos, glf3_t *g, glf3_t *buf) { int i, k ; int k1, v1 = 256 ; int q, nqs = 255 ; /* the quality of the best call at this site, and of a SNP call */ q = quality(g) ; /* find best call */ for (k = 0 ; k < 10 ; ++k) if (g->lk[k] < v1) { k1 = k ; v1 = g->lk[k] ; } /* calculate nqs */ for (i = 0 ; i < 7 ; ++i, ++buf) if (buf != g) { int q = quality(buf) ; if (q < nqs) nqs = q ; } printf ("%s\t%d\t%c\t", name, pos+1, iupac[g->ref_base]) ; printf ("%c\t%d\t", iupac[glfBase[k1]], q) ; printf ("%d\t%.2f\t%d\t%d\t", g->depth, 0.00, g->rms_mapQ, nqs) ; /* can't give av_hits, so set to 0 */ } /**********************************************/ int main (int argc, char *argv[]) { int nameLen ; glfFile fpIn; // gzFile fpIn ; --argc ; ++argv ; /* remove program command */ while (argc) if (!strcmp (*argv, "-help") || !strcmp (*argv, "-h")) { fprintf (stderr, "usage: glf fastq < in.glz > out.geno\n") ; fprintf (stderr, "Writes out fastq from a glf file.\n") ; fprintf (stderr, "Any priors information needs to be applied beforehand; this just normalises the likelihoods,\n") ; fprintf (stderr, "treating them as posteriors odds.\n") ; exit (0) ; } else { fprintf (stderr, "unknown argument %s : type \"glf fastq -help\" for help\n", *argv) ; exit (-1) ; } if (!(fpIn = bgzf_fdopen(fileno(stdin), "r"))) { fprintf (stderr, "failed to open stdin as .glz file\n") ; return 1 ; } glf3_header_t *h; char *name; glf3_t *g3, *g3Buff[7]; int len, pos[7]; h = glf3_header_read(fpIn); g3 = glf3_init1(); int i; unsigned char *qual, *seq; for (i = 0; i < 7; i++) g3Buff[i] = glf3_init1(); while ((name = glf3_ref_read(fpIn, &len)) != 0) { qual = (unsigned char*) malloc (len + 1) ; seq = (unsigned char*) malloc (len + 1) ; for (i = 0; i < 7; i++) pos[i] = -1; i = 0; while (glf3_read1(fpIn, g3Buff[i%7]) && g3Buff[i%7]->rtype != GLF3_RTYPE_END) { g3 = g3Buff[abs((i-3)%7)]; pos[abs((i-3)%7)] = pos[abs((i-4)%7)] + g3->offset; if (i >= 6) report (name, pos[(i-3)%7], g3, *g3Buff) ; } free(name); } /* clean everything up */ glf3_header_destroy(h); glf3_destroy1(g3); bgzf_close(fpIn); /* while (gzread (fpIn, &nameLen, sizeof(int))) /\* new chromosome *\/ */ /* { */ /* int chrLen, i ; */ /* unsigned char *qual, *seq ; */ /* assert(gzread (fpIn, name, nameLen)) ; */ /* assert(gzread (fpIn, &chrLen, sizeof (int))) ; */ /* qual = (unsigned char*) malloc (chrLen + 1) ; */ /* seq = (unsigned char*) malloc (chrLen + 1) ; */ /* for (i = 0 ; i < chrLen ; ++i) */ /* { assert (gzread (fpIn, &glfBuf[i%7], GLF_SIZE) == GLF_SIZE) ; */ /* if (i >= 6 && i-3 == site->pos) */ /* { report (name, i-3, &glfBuf[(i-3)%7], glfBuf, site) ; */ /* if (!(site = readSite (fpSites))) */ /* exit (0) ; */ /* if (strcmp (name, site->name)) */ /* while (++i < chrLen) */ /* assert (gzread (fpIn, &glfBuf[i%7], GLF_SIZE) == GLF_SIZE) ; */ /* } */ /* } */ writeFastq (stdout, noConv, seq, qual, name, len) ; free (name) ; free (seq) ; free (qual) ; } } /************** end of file ************/