/* Site sampling functions for GLAM2 */
#include <assert.h>
#include <errno.h>
#include <float.h>
#include "glam2_util.h"
#include "glam2_site_sample.h"

/* print vector of doubles: for debugging */
void print_vec(double *vec, int size) {
  int i;
  for (i = 0; i < size; ++i)
    printf("%.2g%c", vec[i], i < size-1 ? '\t' : '\n');
}

/* print matrix of doubles: for debugging */
void print_mat(double **mat, int rows, int cols) {
  int i, j;
  for (i = 0; i < rows; ++i)
    for (j = 0; j < cols; ++j)
      printf("%.2g%c", mat[i][j], j < cols-1 ? '\t' : '\n');
}

/* Remove one sequence from an alignment's count matrices */
void unalign(glam2_aln *aln, const int seq_pick, const fasta *f) {
  const int *seq = aln->strands[seq_pick] == '+' ? f->seq : f->rcseq;
  int i;
  if (!ALIGNED(aln, seq_pick))  /* the sequence is unaligned */
    return;
  --aln->aligned_seq;

  for (i = 0; i < aln->width; ++i) {
    glam2_col *col = &aln->cols[i];
    if (col->matches[seq_pick]) {
      const int pos = col->positions[seq_pick];
      assert(seq[pos] < col->alph_size);
      --col->match_count;
      --col->emission_counts[seq[pos]];
    } else
      --col->delete_count;
    aln->insert_counts[i] -= RIGHT_INSERT(aln, i, seq_pick);
  }
}

/* Add one sequence to an alignment's count matrices */
void realign(glam2_aln *aln, const int seq_pick, const fasta *f) {
  const int *seq = aln->strands[seq_pick] == '+' ? f->seq : f->rcseq;
  int i;
  if (!ALIGNED(aln, seq_pick))  /* the sequence is unaligned */
    return;
  ++aln->aligned_seq;

  for (i = 0; i < aln->width; ++i) {
    glam2_col *col = &aln->cols[i];
    if (col->matches[seq_pick]) {
      const int pos = col->positions[seq_pick];
      assert(seq[pos] < col->alph_size);
      ++col->match_count;
      ++col->emission_counts[seq[pos]];
    } else
      ++col->delete_count;
    aln->insert_counts[i] += RIGHT_INSERT(aln, i, seq_pick);
  }
}

static void get_score_matrices(score_matrices *sm, const glam2_aln *aln,
			       glam2_scorer *s) {
  double **match_scores = sm->match_scores;
  double *delete_scores = sm->delete_scores;
  double *insert_scores = sm->insert_scores;
  int i, j;

  for (i = 0; i < aln->width; ++i) {  /* loop over aligned columns */
    const int alph_size = aln->cols[i].alph_size;  /* should be in sm */
    const int *residue_counts = aln->cols[i].emission_counts;
    const int match_count = aln->cols[i].match_count;
    const int delete_count = aln->cols[i].delete_count;
    const int insert_count = aln->insert_counts[i];
    const int aligned_seq = aln->aligned_seq;

    const double ds = beta_ratio_a(&s->d_prior, delete_count, match_count);
    const double ms = beta_ratio_b(&s->d_prior, delete_count, match_count);
    const double is = beta_ratio_a(&s->i_prior, insert_count, aligned_seq);
    const double js = beta_ratio_b(&s->i_prior, insert_count, aligned_seq);

    assert(residue_counts[alph_size] == 0);  /* no mask characters */

    dmix_ratios(&s->e_prior, match_scores[i], residue_counts);

    if (i+1 < aln->width) {
      for (j = 0; j < alph_size; ++j)
	match_scores[i][j] *= js * ms / s->bg.probs[j];
      delete_scores[i] = ds * js;
      insert_scores[i] = is;
    } else {  /* no insertions after last column: special case */
      for (j = 0; j < alph_size; ++j)
	match_scores[i][j] *= ms / s->bg.probs[j];
      delete_scores[i] = ds;
      insert_scores[i] = 0;
    }
  }
}

static void heat_scores(score_matrices *sm, int width, int alph_size,
			double temperature) {
  int i;
  for (i = 0; i < width; ++i)
    pow_dbl(sm->match_scores[i], alph_size, 1 / temperature);
  pow_dbl(sm->delete_scores, width, 1 / temperature);
  pow_dbl(sm->insert_scores, width, 1 / temperature);  /* back is 0 */
}

static void copy_scores(score_matrices *sm, int width, int alph_size) {
  int i;
  for (i = 0; i < width; ++i)
    COPY(sm->rc_match_scores[i], sm->match_scores[i], alph_size);
  COPY(sm->rc_delete_scores, sm->delete_scores, width);
}

/* Stochastically choose a strand (or "unaligned"), using the DP matrix */
int pick_strand(const glam2_aln *aln, const int seq_pick, const data *d) {
  const int width = aln->width;
  const int seq_len = d->seqs.f[seq_pick].seqlen;
  double **dp_mat = d->sm.dp_mat;
  double **rc_mat = d->sm.rc_mat;
  const double dp_rescale = d->sm.dp_rescale;
  const double rc_rescale = d->sm.rc_rescale;
  double tot[3];
  double log_scale[3];
  double max;
  int i, r;

  tot[0] = sum_dbl(dp_mat[width], seq_len+1);
  assert(tot[0] > 0);
  log_scale[0] = dp_rescale;

  if (d->a.two_strands) {
    tot[1] = sum_dbl(rc_mat[width], seq_len+1);
    assert(tot[1] > 0);
    log_scale[1] = rc_rescale;
  } else {
    tot[1] = 0;
    log_scale[1] = dp_rescale;
  }

  if (aln->aligned_seq >= d->a.min_seqs) {
    tot[2] = 1;
    log_scale[2] = 0;
  } else {
    tot[2] = 0;
    log_scale[2] = dp_rescale;
  }

  /*
  puts("tot:");
  print_vec(tot, 3);
  puts("log_scale:");
  print_vec(log_scale, 3);
  */

  max = *max_dbl(log_scale, 3);
  for (i = 0; i < 3; ++i)
    tot[i] *= xexp(log_scale[i] - max);
  r = pick_dbl(tot, 3) - tot;

  if (r == 0)
    return '+';
  else if (r == 1)
    return '-';
  else
    return 0;
}

/* Stochastically choose an alignment endpoint, using the DP matrix */
static int pick_endpoint(const score_matrices *sm, int width,
			 int seq_len, int strand) {
  double **dp_mat = strand == '+' ? sm->dp_mat : sm->rc_mat;

  if (strand == 0)  /* the sequence is unaligned */
    return -1;  /* dummy value, hopefully never used */

  return pick_dbl(dp_mat[width], seq_len+1) - dp_mat[width];
}

/* Stochastic traceback: */
void traceback(glam2_aln *aln, int seq_pick, int strand, int end, data *d) {
  const fasta *f = &d->seqs.f[seq_pick];
  const int *seq = strand == '+' ? f->seq : f->rcseq;
  glam2_col *cols = aln->cols;
  double **ms = strand == '+' ? d->sm.match_scores : d->sm.rc_match_scores;
  double *ds = strand == '+' ? d->sm.delete_scores : d->sm.rc_delete_scores;
  double **dp_mat = strand == '+' ? d->sm.dp_mat : d->sm.rc_mat;
  int i = aln->width - 1;
  int j = end - 1;

  aln->strands[seq_pick] = strand;
  if (strand == 0)  /* the sequence is unaligned */
    return;

  while (i != -1 && j != -1) {
    const double r = rand_dbl(dp_mat[i+1][j+1]);
    const double match = ms[i][seq[j]] * dp_mat[i][j];
    const double delete = ds[i] * dp_mat[i][j+1];

    if (dp_mat[i+1][j+1] < d->sm.underflow_flag)
      d->sm.underflow_flag = dp_mat[i+1][j+1];

    if (r < match) {  /* match */
      assert(seq[j] < d->alph.size);  /* ambiguous residue forbidden */
      cols[i].matches[seq_pick] = 1;
      cols[i].positions[seq_pick] = j;
      --i;
      --j;
    } else if (r < match + delete) {  /* deletion */
      cols[i].matches[seq_pick] = 0;
      cols[i].positions[seq_pick] = j+1;
      --i;
    } else {  /* insertion */
      assert(i+1 < aln->width);
      --j;
    }
  }

  for (; i != -1; --i) {
    if (dp_mat[i+1][j+1] < d->sm.underflow_flag)
      d->sm.underflow_flag = dp_mat[i+1][j+1];
    cols[i].matches[seq_pick] = 0;
    cols[i].positions[seq_pick] = 0;
  }
}

/* Stochastic traceback with nonlinear insertion scores */
void traceback_slow(glam2_aln *aln, int seq_pick, int strand, int end, data *d,
		    double temperature) {
  const fasta *f = &d->seqs.f[seq_pick];
  const int *seq = strand == '+' ? f->seq : f->rcseq;
  glam2_col *cols = aln->cols;
  double **ms = strand == '+' ? d->sm.match_scores : d->sm.rc_match_scores;
  double *ds = strand == '+' ? d->sm.delete_scores : d->sm.rc_delete_scores;
  double **dp_mat = strand == '+' ? d->sm.dp_mat : d->sm.rc_mat;
  int i = aln->width - 1;
  int j = end - 1;

  aln->strands[seq_pick] = strand;
  if (strand == 0)  /* the sequence is unaligned */
    return;

  while (i != -1 && j != -1) {
    double r = rand_dbl(dp_mat[i+1][j+1]);
    int a_count = aln->insert_counts[i];
    int b_count = aln->aligned_seq+1;  /* +1 */
    double insert_score = 1;

    if (dp_mat[i+1][j+1] < d->sm.underflow_flag)
      d->sm.underflow_flag = dp_mat[i+1][j+1];

    for (; j != -1; --j) {
      const double is = xpow(insert_score, 1 / temperature);
      const double match = ms[i][seq[j]] * dp_mat[i][j] * is;
      const double delete = ds[i] * dp_mat[i][j+1] * is;

      if (r < match) {  /* match */
	assert(seq[j] < d->alph.size);  /* ambiguous residue forbidden */
	cols[i].matches[seq_pick] = 1;
	cols[i].positions[seq_pick] = j;
	--i;
	--j;
	break;
      } else if (r < match + delete) {  /* deletion */
	cols[i].matches[seq_pick] = 0;
	cols[i].positions[seq_pick] = j+1;
	--i;
	break;
      }
      assert(i+1 < aln->width);
      r -= match + delete;
      insert_score *= beta_ratio_a(&d->scorer.i_prior, a_count, b_count);
      ++a_count;
    }
  }

  for (; i != -1; --i) {
    if (dp_mat[i+1][j+1] < d->sm.underflow_flag)
      d->sm.underflow_flag = dp_mat[i+1][j+1];
    cols[i].matches[seq_pick] = 0;
    cols[i].positions[seq_pick] = 0;
  }
}

/* Fill in the DP matrix, using the "forward algorithm" (sum of paths): */
void forward(const glam2_aln *aln, int seq_pick, int strand, data *d) {
  const fasta *f = &d->seqs.f[seq_pick];
  const int *seq = strand == '+' ? f->seq : f->rcseq;
  const int *end = seq + f->seqlen;
  double **ms = strand == '+' ? d->sm.match_scores : d->sm.rc_match_scores;
  double *ds = strand == '+' ? d->sm.delete_scores : d->sm.rc_delete_scores;
  double *is = d->sm.insert_scores;
  double **dp_mat = strand == '+' ? d->sm.dp_mat : d->sm.rc_mat;
  double *rescale = strand == '+' ? &d->sm.dp_rescale : &d->sm.rc_rescale;
  double r = 1;  /* rescaling factor for one row */
  int i;
  assert(is[aln->width-1] == 0);
  *rescale = 0;

  for (i = 0; i < aln->width; ++i) {
    if (r <= 0)
      die("%s: underflow in alignment i=%d\n", prog_name, i);
    div_dbl(ms[i], d->alph.size, r);
    ds[i] /= r;
    *rescale += xlog(r);

    /* tried to optimize this inner loop as much as possible */
    const int *s = seq;
    const double *msi = ms[i];
    const double dsi = ds[i];
    const double isi = is[i];
    const double *mat1 = dp_mat[i];
    double *mat2 = dp_mat[i+1];
    double m1 = *mat1;
    double m2 = dsi * m1;
    *mat2 = m2;
    r = m2;

    while (s != end) {
      ++mat1;
      ++mat2;
      m2 *= isi;
      m2 += msi[*s] * m1;
      m1 = *mat1;
      m2 += dsi * m1;
      *mat2 = m2;

      if (m2 > r)
	r = m2;
      if (m2 >= DBL_MAX)  /* this check doesn't seem to slow it down */
	die("%s: overflow %g in alignment i=%d isi=%.3g dsi=%.3g msi=%.3g m1=%.3g\n",
	    prog_name, m2, i, isi, dsi, msi[*s], m1);

      ++s;
    }
  }
}

/* Make vector of probabilities for different insertion sizes */
void beta_convolver_a(const beta *b, double *convolver, int size,
		      int a_count, int b_count) {
  double x = 1;
  int i;
  for (i = 0; i < size; ++i) {
    convolver[i] = x;
    x *= beta_ratio_a(b, a_count, b_count);
    ++a_count;
  }
}

/* z = convolution of x and y */
void convolve(double *z, const double *x, const double *y, const int size) {
  int i, j;
  for (i = 0; i < size; ++i) {
    z[i] = 0;
    for (j = 0; j <= i; ++j)
      z[i] += x[j] * y[i-j];
  }
}

/* Move this to util? */
void truncate_min_dbl(double *ptr, const size_t size, const double min) {
  const double *end = ptr + size;
  for (; ptr != end; ++ptr)
    if (*ptr < min)
      *ptr = min;
}

/* Fill in the DP matrix, using nonlinear insertion scores */
void forward_slow(const glam2_aln *aln, int seq_pick, int strand, data *d,
		  double temperature) {
  const fasta *f = &d->seqs.f[seq_pick];
  const int *seq = strand == '+' ? f->seq : f->rcseq;
  double **ms = strand == '+' ? d->sm.match_scores : d->sm.rc_match_scores;
  double *ds = strand == '+' ? d->sm.delete_scores : d->sm.rc_delete_scores;
  double **dp_mat = strand == '+' ? d->sm.dp_mat : d->sm.rc_mat;
  double *convolver = d->sm.convolver;
  double *convolved = d->sm.convolved;
  double *rescale = strand == '+' ? &d->sm.dp_rescale : &d->sm.rc_rescale;
  int i, j;
  *rescale = 0;

  for (i = 0; i < aln->width; ++i) {
    const double r = *max_dbl(dp_mat[i], f->seqlen+1);
    if (r <= 0)
      die("%s: underflow in alignment i=%d\n", prog_name, i);
    div_dbl(ms[i], d->alph.size, r);
    ds[i] /= r;
    *rescale += xlog(r);

    dp_mat[i+1][0] = ds[i] * dp_mat[i][0];

    for (j = 0; j < f->seqlen; ++j) {
      dp_mat[i+1][j+1] =
        ms[i][seq[j]] * dp_mat[i][j]
	+ ds[i] * dp_mat[i][j+1];

      if (dp_mat[i+1][j+1] >= DBL_MAX)
        die("%s: overflow %g in alignment i=%d j=%d M=%.3g*%.3g D=%.3g*%.3g\n",
            prog_name, dp_mat[i+1][j+1], i, j, ms[i][seq[j]],
            dp_mat[i][j], ds[i], dp_mat[i][j+1]);
    }

    if (i+1 < aln->width) {  /* no insertions after last column */
      beta_convolver_a(&d->scorer.i_prior, convolver, f->seqlen+1,
		       aln->insert_counts[i], aln->aligned_seq+1);  /* +1 */
      pow_dbl(convolver, f->seqlen+1, 1 / temperature);
      if (d->a.algorithm == 2) {
#ifdef FFT
	fft_convolve(dp_mat[i+1], dp_mat[i+1], convolver, f->seqlen+1, &d->sm.fft);
#endif
	/* set negative numbers produced by FFT to zero: big fat kludge */
	truncate_min_dbl(dp_mat[i+1], f->seqlen+1, 0);
      } else {
	convolve(convolved, dp_mat[i+1], convolver, f->seqlen+1);
	COPY(dp_mat[i+1], convolved, f->seqlen+1);
      }
    }
  }
}

#define FORWARD(aln, seq_pick, strand, d, temperature)\
        ((d)->a.algorithm == 0 ?\
         forward(aln, seq_pick, strand, d) :\
	 forward_slow(aln, seq_pick, strand, d, temperature))

#define TRACEBACK(aln, seq_pick, strand, end, d, temperature)\
        ((d)->a.algorithm == 0 ?\
         traceback(aln, seq_pick, strand, end, d) :\
         traceback_slow(aln, seq_pick, strand, end, d, temperature))

/* Stochastically re-align one sequence */
void site_sample(glam2_aln *aln, int seq_pick, data *d, double temperature) {
  int strand, end;
  const fasta *f = &d->seqs.f[seq_pick];
  unalign(aln, seq_pick, f);
  get_score_matrices(&d->sm, aln, &d->scorer);
  heat_scores(&d->sm, aln->width, d->alph.size, temperature);
  if (d->a.two_strands)
    copy_scores(&d->sm, aln->width, d->alph.size);
  FORWARD(aln, seq_pick, '+', d, temperature);
  if (d->a.two_strands)
    FORWARD(aln, seq_pick, '-', d, temperature);
  strand = pick_strand(aln, seq_pick, d);
  end = pick_endpoint(&d->sm, aln->width, f->seqlen, strand);
  /*
  puts("Delete scores:");
  print_vec(d->sm.delete_scores, aln->width);
  puts("Insert scores:");
  print_vec(d->sm.insert_scores, aln->width);
  puts("Match scores:");
  print_mat(d->sm.match_scores, aln->width, d->alph.size);
  puts("DP matrix:");
  print_mat(d->sm.dp_mat, aln->width+1, f->seqlen+1);
  printf("strand=%c end=%d\n", strand ? strand : '0', end);
  */
  TRACEBACK(aln, seq_pick, strand, end, d, temperature);
  realign(aln, seq_pick, f);
}