#include #include #include #include #include #include #include #include #include #include "sam.h" #include #include "faidx.h" #include "fisher2.h" //#include #include "getopt.h" //#include #define MIN(x,y) ((x)>(y)?(y):(x)) #define MAX(x,y) ((x)>(y)?(x):(y)) using namespace std; typedef struct { int beg, end; samfile_t *in; int prev; vector maqindel; bam_maqcns_t *c; bam_maqindel_opt_t *ido; faidx_t *fai; char *ref; int len, tid,last_pos; int baseq_cutoff; int index2; //consensus/SNP quality int dist[8]; int* cov; int* del; char *ori; char *cons; } tmpstruct_t; map atgchash; typedef struct { uint32_t mapq_cutoff; bam_header_t* header; uint8_t otype; bam_plbuf_t *buf; } fetstruct_t; // callback for bam_fetch() static int fetch_func(const bam1_t *b, void *data) { fetstruct_t* psu =(fetstruct_t*)data; bam_header_t *h = (bam_header_t*)psu->header; uint32_t flag=b->core.flag; // if ( b->core.qual==0 ) return 0; //This reads is not uniqully mapped. if ( flag & 0x0004 ) return 0; //This reads is unmapped // if ( ! (flag & 0x0001) ) return 0; //This read is originally single-end if ( flag & 0x0400 ) return 0; // This read is a(n) PCR/optical duplicate if ( psu ->mapq_cutoff && (int)b->core.qual < (int) psu->mapq_cutoff ) return 0; bam_plbuf_t *buf = psu->buf; bam_plbuf_push(b, buf); return 0; } // callback for bam_plbuf_init() static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data) { tmpstruct_t *d = (tmpstruct_t*)data; uint32_t cns = 0; int i, j, rb, rb2, rms_mapq = -1, *proposed_indels = 0; uint32_t k; int dist[4]; if (d->fai && (int)tid != d->tid) { if (d ->ref != NULL) free(d->ref); d->ref = fai_fetch(d->fai, d->in->header->target_name[tid], &d->len); d->tid = tid; } rb = (d->ref && (int)pos < d->len)? d->ref[pos] : 'N'; cns = bam_maqcns_call(n, pu, d->c); if ( (int)(pos- d->prev) >1) { //process the zero-coverage section for (k = MAX(d->prev+1, 0); k< MIN(pos, d->end); k++) { rb2 = (d->ref && (int) k < d->len)? d->ref[k] : 'N'; printf("%s\t%d\t0\t%c\t%c\n", d->in->header->target_name[tid], k + 1, rb2, rb2); d->cov[k-d->beg]=0; d->cons[k-d->beg]=rb2; d->ori[k-d->beg]=rb2; } } if ((int)pos >= d->beg && (int)pos < d->end) { int ref_q, rb4 = bam_nt16_table[rb]; *d->cov=n; ref_q = 0; if (rb4 != 15 && cns>>28 != 15 && cns>>28 != rb4) { // a SNP ref_q = ((cns>>24&0xf) == rb4)? cns>>8&0xff : (cns>>8&0xff) + (cns&0xff); if (ref_q > 255) ref_q = 255; } d->index2=cns>>8&0xff; // rms_mapq = cns>>16&0xff; //printf("%s\t%d\t%d\t", d->in->header->target_name[tid], pos + 1, n); //printf("%c\t%c\t%d\t%d\t%d\n", rb,bam_nt16_rev_table[cns>>28], cns>>8&0xff, ref_q, rms_mapq); // d->cov[pos-d->beg]=n; // d->cons[pos-d->beg]=bam_nt16_rev_table[cns>>28]; // d->ori[pos-d->beg]=rb; d->cons[pos-d->beg]=bam_nt16_rev_table[cns>>28]; for (i = 0; i < n; ++i) { const bam_pileup1_t *p = pu + i; if (p->is_del) { (*d->del)++; } if (!p->is_del) { int c = bam1_qual(p->b)[p->qpos] ; // putchar(c+33); if (c < d->baseq_cutoff ) continue; c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; // if (c == '=' || toupper(c) == toupper(rb4)) c = bam1_strand(p->b)? ',' : '.'; // else c = bam1_strand(p->b)? tolower(c) : toupper(c); // putchar(c); c = bam1_strand(p->b)? tolower(c) : toupper(c); if ( toupper(c) != 'A' && toupper(c) != 'T' && toupper(c) != 'G' && toupper(c) != 'C') continue; d->dist[atgchash[c]]++; } } } // d->prev =pos; return 0; } int usage() { fprintf(stderr, "Usage: dcgh [-c] <...>\n"); return 1; } int main(int argc, char *argv[]) { char c; extern int optind; int cov_thr=-1; int mymapq_cutoff=0; int cov_low=-1; int colhead=0; int sortScore=0; while ((c = getopt(argc, argv,"c:q:l:hs")) >=0 ) { switch (c) { case 'c': cov_thr = atoi(optarg); break; case 'q': mymapq_cutoff = atoi(optarg); break; case 'l': cov_low = atoi(optarg); break; case 'h': colhead = 1; break; case 's': sortScore= 1; break; default: return usage() ; } } tmpstruct_t tmp; fetstruct_t fet; atgchash['A']=0; atgchash['T']=2; atgchash['G']=4; atgchash['C']=6; atgchash['a']=1; atgchash['t']=3; atgchash['g']=5; atgchash['c']=7; vector vsam; vector vind; if (argc - optind <= 3) { return usage(); } tmp.beg = 0; tmp.end = 0x7fffffff; tmp.fai = fai_load(argv[optind]); if (tmp.fai == 0 ) { fprintf(stderr, "Fail to open fai file %s\n", argv[3]); return 1; } for (int i= optind +2; i< argc; i++ ) { tmp.in = samopen(argv[i], "rb", 0); tmp.ref=NULL; if (tmp.in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", argv[3]); return 1; } bam_index_t *idx; idx = bam_index_load(argv[i]); // load BAM index if (idx == 0) { fprintf(stderr, "BAM indexing file is not available.\n"); return 1; } vsam.push_back(tmp.in); vind.push_back(idx); } tmp.cov = (int*) malloc(sizeof(int)*1); tmp.del = (int*) malloc(sizeof(int)*1); tmp.cons = (char*) malloc(sizeof(char)*1); tmp.tid=-1; tmp.c = bam_maqcns_init(); tmp.ido = bam_maqindel_opt_init(); bam_maqcns_prepare(tmp.c); tmp.baseq_cutoff=0; fet.header=tmp.in->header; fet.mapq_cutoff=mymapq_cutoff; //the reads mapping quality cutoff string row; int pos, nEntry=0; char key; int rub; string oldI; string newI; int z=0; string chname; ifstream fin(argv[optind+1]); string pre_chname=""; multimap flist; int Nrow=vsam.size(); int Ncol=2; double *ptable=new double[Nrow*Ncol]; while(getline(fin, row)) { ostringstream stro; istringstream is(row); if (colhead) { is >>key; } is >> chname; is >> pos; is >> rub; is>>oldI; is>>newI; if (rub != 0 ) { if (colhead) cout << key <<"\t"; cout << chname <<"\t" <header, chname.c_str(), &tmp.tid, &tmp.beg, &tmp.end); tmp.ref = fai_fetch(tmp.fai, chname.c_str(), &tmp.len); pre_chname=chname; } tmp.beg=pos-1; tmp.end=pos; tmp.prev=tmp.beg; if (colhead) stro<< key <<"\t"; stro << chname <<"\t" << pos << "\t" << tmp.ref[pos-1]; int profl[4]={0,0,0,0}; vector allin; vector allcov; vector alldel; int ft_snpq; char ft_cons; for (int j=0; j<1; j++) { fill(tmp.dist, tmp.dist+8, 0); *(tmp.del)=0; bam_plbuf_t* buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup fet.buf=buf; bam_fetch(vsam[j]->x.bam, vind[j], tmp.tid, tmp.beg, tmp.end, &fet, fetch_func); bam_plbuf_push(0, buf); // finalize pileup bam_plbuf_destroy(buf); /* stro<<"\t" << tmp.dist[0] ; for (int k=1; k<8; k++) stro<< ";" << tmp.dist[k]; stro<<";" <<*tmp.del; */ if (j==0) { ft_snpq=tmp.index2; ft_cons=tmp.cons[0];} if (ft_cons == newI[0]) stro<<"\t" << ft_cons; else stro<<"\t" << ft_cons; for (int k=0; k<4; k++) { allin.push_back(tmp.dist[2*k]); allin.push_back(tmp.dist[2*k+1]); profl[k]+= tmp.dist[2*k] + tmp.dist[2*k+1]; stro<<"\t" << profl[k]; } stro<<"\t" <<*tmp.del; allcov.push_back(*tmp.cov); alldel.push_back(*tmp.del); } //stro<<"\t" << ft_snpq; stro<<"\t" << allcov[0]; char uppref=toupper(tmp.ref[pos-1]); int good4fe=1; if ( cov_thr >0 ) { for (int n=0; n cov_thr) { good4fe=0; break;} } if ( cov_low >0 ) { for (int n=0; n 0.1) stro <<"\t" << cgh_q; else stro <<"\t" << 0; } else { cgh_q=0; stro <<"\t" << "0"; } } */ if (sortScore) flist.insert( pair(cgh_q, stro.str()) ); else cout << stro.str() <::reverse_iterator it=flist.rbegin(); it!=flist.rend(); it++) { cout << it->second <