#ifndef _HAPPY_H_ #define _HAPPY_H_ #include typedef enum { BOTH, LEFT, UNLINKED, RIGHT, LINKAGE_STATES } LINKAGESTATES; typedef enum { GIBBS_SAMPLING, EXPECTATION_MAXIMISATION } ALLOCATION_METHODS; typedef struct { int markers; int *chrom1; int *chrom2; } CHROM_PAIR; typedef struct { int alleles; char *marker_name; char **allele_name; double *allele_freq; /* observed frequency of alleles */ double **pr_AtoS; /* prob of strain s | allele a */ double **pr_StoA; /* prob of allele a | strain s */ double **logpr_AtoS; double entropy; double position; /* estimate of the distance of the QTL from the left-hand end */ double ProbSame; /* prob of an observable recomb between this and the next marker */ double **prior; /* array of probabilities that the pair of QTL states are in */ int *which_allele; } ALLELE_FREQ; typedef struct { int strains; char **strain_name; int markers; int generations; double *Pr_ss; /* array of transition probabilities for staying in same state */ double *Pr_st; /* array of transition probabilities for changing state */ ALLELE_FREQ *af; } ALLELES; /* DP_MATRICES contains the forward and backward dynamic-programming matrices */ typedef struct { double ***Left; /* forward DP matrix */ double ***Right; /* backward DP matrix */ double ProbRecomb; /* prob of a recombinant (between each pair of adjacent markers) */ double log_theta; /* log-probability of a recombinant */ double log_theta1; /* log-probability of no recombinant */ } DP_MATRICES; /* QTL_FIT contains all the data associated with fitting the QTL */ typedef struct { int locus; /* left-hand marker locus of current interest */ double rss; /* the residual sum of squares */ double fss; /* the fitting sum of squares */ double F; /* the F-ratio */ double pval; /* hte p-value of the F */ double mean; /* the estimated mean trait response */ double log_posterior; /* the log posterior likelihood */ double *trait; /* array of estimated trait effects for each strain */ double *trait_error; /* array of estimated trait standard errors for each strain */ double sigma; /* estimated residual standard error */ int iterations; /* the number of iterations performed */ int *trait1; /* array of N predicted trait states for chrom1 */ int *trait2; /* array of N predicted trait states for chrom2 */ int left, right; /* counts of the number of chromosomes allocated left and right */ int indeterminate; /* count of the number of indeterminate chromosomes */ double position; /* estimate of the distance of the QTL from the left-hand end */ double **design_matrix; /* alternative expectations of traits for regression */ double *residual; /* residduals from fit */ } QTL_FIT; /* QTL_DATA is a portmanteau struct that contains pretty much all the data */ typedef struct { char *filename; /* Name of the data-set */ int N; /* Number of individuals */ int M; /* Number of markers */ int S; /* Number of strains */ ALLELES *alleles; /* pointer to ALLELES struct containing the ancestral strain info */ char **name; /* array of names of individuals */ double *observed; /* array of N observed trait values */ CHROM_PAIR *genos; /* array of N CHROM_PAIR structs containing raw marker genotypes */ CHROM_PAIR *haplos; /* array of N CHROM_PAIR structs containing deduced strain haplotypes */ DP_MATRICES *dp_matrices; /* array of N dynamic-programming matrices for computing the priors */ QTL_FIT *fit; /* array of QTL_FIT structures, for each marker locus */ QTL_FIT *true; /* QTL_FIT struct containing the true answer (for simulations)*/ QTL_FIT *null_model; /* QTL_FIT struct containing the null model fit */ double drop; /* factor for reducing search space of prior configurations */ int from_marker; int to_marker; int phase_known; /* switch indicating whether the phase of the genotypes is known - ie they are haplotypes */ } QTL_DATA; /* SINGLE_QTL_PROB contains the data for one possible configuration of the haplotypes in the neighbourhood of the trait locus, and the trait states, together with the prior and posterior log probabilities of the configuration */ typedef struct { int left1, left2, right1, right2, trait1, trait2; double log_prior, log_posterior, cum_prob; double cum; } SINGLE_QTL_PROB; /* QTL_PROB contains all the important configurations for a given individual */ typedef struct { int len; SINGLE_QTL_PROB *prob; } QTL_PROB; typedef struct { double prior, posterior, cum; } QTL_PRIOR; typedef struct { double key, value; } KV; /* function prototypes */ CHROM_PAIR *new_chrom_pair( int markers ); ALLELES *input_allele_frequencies( FILE *fp, int generations, int verbose, int errors ); double **inverse_transition_matrix( double **pr_a_s, int strains, int alleles, char **allele_name, char **strain_name, int verbose ); CHROM_PAIR *read_genotypes( FILE *fp, ALLELES *A, int *genos ); QTL_PRIOR ***compute_qtl_priors( QTL_DATA *qtl, QTL_PRIOR ***qp, int locus, double **prior ); int qpcmp( const void *A, const void *B ); double fit_null_qtl_model( QTL_DATA *qtl_data ); void allocate_traits( QTL_DATA *q, QTL_PRIOR ***p, QTL_FIT *fit, int mode ); void fit_qtl( QTL_DATA *q, int locus, int verbose, int shuffles, char *prefix ); double fit_linear_additive_model( QTL_DATA *qtl, QTL_FIT *fit, int shuffles, int verbose ); QTL_FIT *allocate_qtl_fit( QTL_FIT *fit, int N, int strains ); void print_qtl_data ( QTL_DATA *q, QTL_FIT *fit, FILE *fp ); void qtl_fit_cp( QTL_FIT *fit1, QTL_FIT *fit2, int N, int S ); QTL_DATA *read_qtl_data( FILE *fp, char *name, ALLELES *a, int normalize, int verbose, int errors ); void write_qtl_data( FILE *fp, QTL_DATA *q ); double ***summed_dp_matrix( QTL_DATA *qtl, int individual, double *p1, double *p2, int direction ); void create_summed_dp_matrices( QTL_DATA *q ); QTL_PRIOR ***allocate_qtl_priors( QTL_DATA *q ); int remove_partial_fit( QTL_DATA *q, char *marker, int verbose, int fail ); void permute_data( double *data , int N ); void permute_genotypes( QTL_DATA *q ); void pointwise_mapping( QTL_DATA *q, double step, int verbose ); void pointwise_interval_mapping_probabilities( QTL_DATA *q, int locus, double c, double **prior ); QTL_DATA *resample_qtl_data( QTL_DATA *q, QTL_DATA *r ); void bootstrap_analysis( QTL_DATA *q, int bootstrap, char *bootstart, char *bootstop, int verbose ); void sequential_fit( QTL_DATA *q ); int marker_index( char *name, QTL_DATA *q, int fail ); int genotype_difference( QTL_DATA *q, int i, int j ); int pdump_prob_data( FILE *fp, int locus, QTL_DATA *q ); #endif