#ifndef PSSM_H
#define PSSM_H

// referred to by mhmm-state.h
typedef struct pssm PSSM_T;

#include "mhmm-state.h"
#include "prior-dist.h"

//
// Range of integral score values for a PSSM column.
//
#define PSSM_RANGE 100

//
// Macros to convert between scaled score and raw score.
//
#define scaled_to_raw(x,w,scale,offset) (((x)/(scale)) + ((w)*(offset)))
#define raw_to_scaled(x,w,scale,offset) (nint(((x) - ((w)*offset)) * (scale)))

#define get_pssm_w(pssm) ((pssm)->w)
#define get_pssm_alph(pssm) ((pssm)->alph)
#define get_pssm_alphsize(pssm) ((pssm)->alphsize)
#define get_pssm_scale(pssm) ((pssm)->scale)
#define get_pssm_offset(pssm) ((pssm)->offset)
#define get_pssm_pv_length(pssm) (get_array_length((pssm)->pv))
#define get_pssm_pv(score, pssm) (get_array_item(score, (pssm)->pv))
#define get_pssm_score(row, col, pssm) (get_matrix_cell((row), (col), (pssm)->matrix))

//
// PSSM
//
// This object was created for AMA because the "scale" and "offset"
// parameters need to be stored with each PSSM, but were 
// globals in pssm.c.  This object should be used in all the programs
// that use PSSMs, since their scale and offset can differ, and their
// cdfs and pdfs should be kept with them.
//
struct pssm {
  MATRIX_T *matrix;	// The PSSM score matrix.
  ALPH_T alph;          // The alphabet of the pssm
  int alphsize;         // The size of the alphabet after hashing
  int w;		// Width of PSSM.
  BOOLEAN_T matrix_is_log;	// True if matrix is log likelihood ratio.
  BOOLEAN_T matrix_is_scaled;	// True if matrix is scaled.
  double scale;		// Scale factor for scores.
  double offset;	// Offset for scores.
  int range;		// Scaled scores in range [0..range].
  ARRAY_T *pv;		// P-value table for scores.
  int num_gc_bins;	// Number of entries in gc_pv list.  If > 1, then ->pv is NULL.
  ARRAY_T **gc_pv;	// P-value tables for different GC contents: [gc_bin, score].
  int min_score;	// Smallest index with non-zero pdf.
  int max_score;	// Largest index with non-zero pdf.
  MOTIF_T * motif;      // may be NULL but can be useful e.g. for id
};

//
// PSSM_PAIR
//
// PSSMs for the negative and positive DNA motifs.
//
typedef struct pssm_pair {
  PSSM_T* pos_pssm;		// positive strand PSSM
  PSSM_T* neg_pssm;		// negative strand PSSM 
  // Stuff below here is for AMA:
  // The pv lookup table for the average of n scores will be
  // in row log_2(n), for n=1, 2, 4, ...
  int num_gc_bins;		// this is the number of n_pv_lookup tables
  MATRIX_T** gc_n_pv_lookup;	// pv[gcbin, log_2(n), score] lookup table
  ARRAY_T* scaled_to_ama;	// for speed
  MOTIF_T* motif;               // use with care in case motif deallocated
} PSSM_PAIR_T;

void set_up_pssms_and_pvalues (
  BOOLEAN_T motif_scoring,		// Motif scoring?
  double p_threshold,			// Scale/offset PSSM and create table if > 0
  BOOLEAN_T use_both_strands,           // Compute PSSM for negative strand, too?
  BOOLEAN_T allow_weak_motifs,		// Allow motifs with min p-value < p_threshold?
  MHMM_T*  the_hmm                      // The HMM.
);

void compute_motif_score_matrix
  (BOOLEAN_T  use_pvalues,	// Returns scores as p-values, not log-odds.
   double     p_threshold,	// Divide p-values by this.
   int*       int_sequence,
   int        seq_length,
   MHMM_T*    the_hmm,
   MATRIX_T** motif_score_matrix);

void scale_pssm(
  PSSM_T *pssm,		          // The PSSM. (IN/OUT)
  PRIOR_DIST_T *prior_dist, // Distribution of priors (IN)
  double alpha,             // Fraction of all TFBS that are the TFBS of interest
  int range 			          // The desired range. (IN) 
);

ARRAY_T *scale_prior_dist(
  ARRAY_T *priors, // Distribution of priors (IN/OUT)
  int range,			 // The desired range. (IN) 
  double scale,    // The desired scale. (IN)
  double offset    // The desired offset. (IN)
);

void get_pv_lookup_pos_dep(
  PSSM_T* pssm,			           // The PSSM.
  MATRIX_T* background_matrix, // The background model PSSM matrix.
  ARRAY_T* scaled_prior_dist    // Scaled distribution of priors.
);

void get_pv_lookup(
  PSSM_T* pssm,			         // The PSSM.
  ARRAY_T* background,       // The background model.
  ARRAY_T* scaled_prior_dist // Scaled distribution of priors.
);

double get_unscaled_pssm_score(
  double score,
  PSSM_T* pssm
);

double get_scaled_pssm_score(
  double score,
  PSSM_T* pssm
);

PSSM_T* build_motif_pssm(
  MOTIF_T* motif,	           // motif frequencies p_ia (IN)
  ARRAY_T* bg_freqs,	       // background frequencies b_a for pssm (IN)
  ARRAY_T* pv_bg_freqs,	     // background frequencies b_a for p-values (IN)
  PRIOR_DIST_T* prior_dist,  // Distribution of priors. May be NULL (IN)
  double alpha,              // Scale factor for non-specific priors. 
                             // Unused if prior_dist is NULL.
  int range,		             // range of scaled scores is [0..w*range]
  int num_gc_bins,	         // create pv tables for this number of GC bins
			                       // instead of using the pv_bg_freqs
  BOOLEAN_T no_log  	       // make likelihood ratio pssm
);

PSSM_T* build_matrix_pssm(
  ALPH_T alph,          // alphabet (IN)
  MATRIX_T* matrix,     // pssm matrix (IN)
  ARRAY_T* bg_freqs,    // background frequencies b_a (IN)
  int range             // range of scaled scores is [0..w*range] (IN)
);

double get_ama_pv(
  double ama_score,                     // average likelihood ratio score
  int seqlen,                           // length of sequence scanned
  double seq_gc,                        // total GC content of sequence
  PSSM_PAIR_T* pssm_pair                // pssms for pos and neg motifs
);

PSSM_PAIR_T* create_pssm_pair(
  PSSM_T* pos_pssm,		// positive strand pssm
  PSSM_T* neg_pssm 		// negative strand pssm
);

void free_pssm_pair(
  PSSM_PAIR_T *pssm_pair
);

PSSM_T* allocate_pssm(
  ALPH_T alph,
  int w, 
  int alphsize, // may be different from alph_size(alph, ALPH_SIZE) if PSSM is hashed
  int num_gc_bins
);

void free_pssm(
  PSSM_T* pssm
);

/**************************************************************************
 * sum up the largest values for each position in the pssm to generate 
 * the best possible score for a match. Output is unscaled. 
 **************************************************************************/
double pssm_best_match_score(
  PSSM_T* pssm
);

#endif