#include #include #include #include #include #include #include #include #include typedef struct { char *chr; int len, maxlen; int* cov[5]; double *maf; } COVERAGE; COVERAGE *ReadCoverage( char *dir, char *chrom, int min_cov, int max_cov ) { int lookup[256]; char rlook[4]; int i; for(i=0;i<256;i++) lookup[i] = 4; lookup['a'] = lookup['A'] = 0; lookup['c'] = lookup['C'] = 1; lookup['g'] = lookup['G'] = 2; lookup['t'] = lookup['T'] = 3; lookup['u'] = lookup['U'] = 3; lookup['.'] = lookup[','] = -1; rlook[0] = 'A'; rlook[1] = 'C'; rlook[2] = 'G'; rlook[3] = 'T'; COVERAGE *cov = (COVERAGE*)calloc(1, sizeof(COVERAGE)); cov->chr = strdup(chrom); cov->len = 0; cov->maxlen = 250000000; for(i=0;i<5;i++) cov->cov[i] = (int*)calloc(cov->maxlen,sizeof(int)); cov->maf = (double*)calloc(cov->maxlen,sizeof(double)); glob_t globbuf; char pattern[256], q1[10000], q2[10000], p[10000]; struct dirent **d; sprintf(pattern, "%s/*regenotypes.gz", dir); printf("pattern %s\n", pattern); glob( pattern, 0, NULL, &globbuf ); int n = globbuf.gl_pathc; int bp, c; char chr[256], ref, a[256]; char *buffer = (char*)calloc(100000,sizeof(char)); // if ( n > 21 ) n =21; if (n < 0) fprintf(stderr, "no input files in directory %s", dir); else { for(i=0;imaxlen ) { // only one read and a SNP if ( allele == -1 ) // reference call cov->cov[lookup[ref]][bp]++; else cov->cov[allele][bp]++; if ( cov->len <= bp ) cov->len = bp+1; } } } gzclose(fp); } } for(i=0;ilen;i++) { int t = cov->cov[0][i] + cov->cov[1][i] + cov->cov[2][i] + cov->cov[3][i]; if ( t > min_cov && t < max_cov ) { int major=0, minor=0; int t_major=0, t_minor=t; int nalleles = 0, k; for( k=0;k<4;k++ ) nalleles += cov->cov[k][i] > 0; if ( nalleles > 1 ) { for( k=0;k<4;k++ ) if ( cov->cov[k][i] > t_major ) { t_major = cov->cov[k][i]; major = k; } for( k=0;k<4;k++ ) if ( major != k && cov->cov[k][i] > t_minor ) { t_minor = cov->cov[k][i]; minor = k; } printf( "%s\t%d\t%d\t%c\t%c\t%d\n", chr, i, rlook[major], t_major, rlook[minor], t_minor); } } } return(cov); } int main ( int argc, char **argv ) { char dir[256], chr[256]; int min_cov=50; int max_cov=1000; int c; strcpy(dir,"./"); strcpy(chr, "unknown"); while ((c = getopt (argc, argv, "c:d:m:x:")) != -1) switch (c) { case 'c': strcpy(chr, optarg); break; case 'd': strcpy(dir, optarg); break; case 'm': sscanf(optarg, "%d", &min_cov); break; case 'x': sscanf(optarg, "%d", &max_cov); break; } COVERAGE *cov = ReadCoverage( dir, chr, min_cov, max_cov ); return(0); }