#include #include #include #include #include #include #include #include #include"reconstruction.h" typedef struct { int from, to, n, A, B, AB; } WINDOW_COUNTS; void WriteTwoStrainConcordance( SEQ_CALL *sq, double window ); void ProcessSeqCalls( char *dir, char *suffix, ALLELE_DATA **add, int nchr, char **chroms, int *nseq, int penalty, int backcross, int diploid, FILE *outfp, double window ) { glob_t globbuf; char pattern[256]; struct dirent **d; sprintf(pattern, "%s/*%s", dir, suffix ); printf("pattern %s\n", pattern); glob( pattern, 0, NULL, &globbuf ); int n = globbuf.gl_pathc; int chr; if (n < 0) fprintf(stderr, "ERROR no files to process in %s\n", dir ); else { int k=0,i; for(i=0;i 0 ) { if ( backcross ) WriteBackCrossConcordance( seq_call, window ); else WriteTwoStrainConcordance( seq_call, window ); } else for(chr=0;chrNfounders; printf("nf %d\n",nf); if ( nf == 3 ) { char filename[256]; sprintf( filename, "%s.bx.concordance.txt", sq->name); FILE *fp = fopen( filename, "w"); if ( fp ) { printf( "writing %s\n", filename); fprintf( fp, "chr start end snps A B AB r\n"); int chr; int debug = !strcmp( "WTCHG_39840_202", sq->name); for( chr=0;chrnchr;chr++) { int i; int lastbin = 0; int nbin; int ns = sq->chr_end[chr]-sq->chr_start[chr]; //+1; int mxbin = (int)250e6/window; WINDOW_COUNTS *buf = (WINDOW_COUNTS*)calloc(mxbin, sizeof(WINDOW_COUNTS)); if ( ns > 10 ) { int offset = sq->chr_start[chr]; int A=0, B=0, AB=0, n=0; lastbin=0; for(i=0;ibp[k]/window); if (debug && chr==8 && sq->bp[k] > 150e6 ) printf( "%d %d\n",sq->bp[k], nbin ); if ( nbin>lastbin) lastbin=nbin; if ( nbin >= mxbin ) { mxbin *= 2; WINDOW_COUNTS *buf2 = (WINDOW_COUNTS*)realloc( buf, sizeof(WINDOW_COUNTS)*mxbin ); int j; for( j=nbin; jallele[k] == sq->founder[k][0]) && !(sq->allele[k] == sq->founder[k][1] || sq->allele[k] == sq->founder[k][2] ); buf[nbin].B += (sq->allele[k] == sq->founder[k][1]) && !(sq->allele[k] == sq->founder[k][0] || sq->allele[k] == sq->founder[k][2]); buf[nbin].AB += (sq->allele[k] == sq->founder[k][1]) || (sq->allele[k] == sq->founder[k][0]); buf[nbin].n ++; } } for(nbin=0; nbin<=lastbin;nbin++) fprintf(fp, "%s %d %d %d %d %d %d %g\n", sq->chroms[chr], (int)(nbin*window), (int)((nbin+1)*window), buf[nbin].n, buf[nbin].A, buf[nbin].B, buf[nbin].AB, (double)(buf[nbin].A-buf[nbin].B)/(double)(buf[nbin].A+buf[nbin].B+1.0e-10)); free(buf); } fclose(fp); } } } void WriteTwoStrainConcordance( SEQ_CALL *sq, double window ) { int nf = sq->Nfounders; if ( nf == 2 ) { char filename[256]; sprintf( filename, "%s.concordance.txt", sq->name); FILE *fp = fopen( filename, "w"); if ( fp ) { printf( "writing %s\n", filename); fprintf( fp, "chr start end snps A B AB r\n"); int chr; int debug = !strcmp( "WTCHG_39840_202", sq->name); for( chr=0;chrnchr;chr++) { int i; int lastbin = 0; int nbin; int ns = sq->chr_end[chr]-sq->chr_start[chr]; //+1; int mxbin = (int)250e6/window; WINDOW_COUNTS *buf = (WINDOW_COUNTS*)calloc(mxbin, sizeof(WINDOW_COUNTS)); if ( ns > 10 ) { int offset = sq->chr_start[chr]; int A=0, B=0, AB=0, n=0; lastbin=0; for(i=0;ibp[k]/window); if (debug && chr==8 && sq->bp[k] > 150e6 ) printf( "%d %d\n",sq->bp[k], nbin ); if ( nbin>lastbin) lastbin=nbin; if ( nbin >= mxbin ) { mxbin *= 2; WINDOW_COUNTS *buf2 = (WINDOW_COUNTS*)realloc( buf, sizeof(WINDOW_COUNTS)*mxbin ); int j; for( j=nbin; jallele[k] == sq->founder[k][0]) && !(sq->allele[k] == sq->founder[k][1]); buf[nbin].B += (sq->allele[k] == sq->founder[k][1]) && !(sq->allele[k] == sq->founder[k][0]); buf[nbin].AB += (sq->allele[k] == sq->founder[k][1]) || (sq->allele[k] == sq->founder[k][0]); buf[nbin].n ++; } } for(nbin=0; nbin<=lastbin;nbin++) fprintf(fp, "%s %d %d %d %d %d %d %g\n", sq->chroms[chr], (int)(nbin*window), (int)((nbin+1)*window), buf[nbin].n, buf[nbin].A, buf[nbin].B, buf[nbin].AB, (double)(buf[nbin].A-buf[nbin].B)/(double)(buf[nbin].A+buf[nbin].B+1.0e-10)); free(buf); } fclose(fp); } } } int main( int argc, char **argv ) { ALLELE_DATA **add; int index; int c; int penalty=100; int backcross=-1; int diploid = 0; int format=1, nchr = 0; char *alleledir = NULL; char *balleledir = NULL; char *outdir = NULL; char *calldir = NULL; char *maskfile = NULL; char *zfile = NULL; char *Penalty = NULL; char *opterr = 0; char *Species = strdup("mouse"); double window=-1; while ((c = getopt (argc, argv, "a:b:m:c:dk:o:p:s:uw:z:")) != -1) switch (c) { case 'a': alleledir = optarg; break; case 'b': sscanf( optarg, "%d", &backcross); // this should be the index in the alleles data of the strain to use for backcrossing break; case 'c': calldir = optarg; break; case 'd': diploid = 1; break; case 'k': balleledir = optarg; break; case 'm': maskfile = optarg; break; case 'o': outdir = optarg; break; case 'p': Penalty = optarg; break; case 's': Species = optarg; break; case 'u': format = 0; break; case 'w': sscanf(optarg, "%lf", &window ); break; case 'z': zfile = optarg; break; } if ( Penalty ) sscanf( Penalty, "%d" , &penalty); if ( outdir == NULL ) outdir = strdup("./"); if ( zfile == NULL ) zfile = strdup("mosaic.txt"); // outputfile of mosaic printf("calldir %s\n", calldir ); printf("penalty = %d\n", penalty); printf("species = %s\n", Species ); printf("window = %g\n", window); char **chroms = TheChromosomes( Species, &nchr ); if ( (alleledir ||balleledir) && calldir ) { int s, chr, nseq=0; ALLELE_DATA ** add = NULL; if ( alleledir ) add = ReadGenomeAlleleData( alleledir, maskfile, format, nchr, chroms ) ; else if ( balleledir ) add = ReadAllBinaryAlleleData(balleledir, 0, nchr, chroms ); FILE *fp = fopen(zfile,"w"); ProcessSeqCalls( calldir , "", add, nchr, chroms, &nseq, penalty, backcross, diploid, fp, window ); } }