/***********************************************************************
 * FILE: motif.c
 * AUTHOR: William Stafford Noble
 * CREATE DATE: 7-13-97
 * PROJECT: MHMM
 * COPYRIGHT: 2001-2008, WSN
 * DESCRIPTION: Data structure for representing one motif.
 ***********************************************************************/
#include <assert.h>
#include <math.h>
#include <stdlib.h> /* For definition of NULL .*/
#include <string.h>

#include "motif.h"
#include "motif-spec.h"

// same default site count as Tomtom's website
#define DEFAULT_SITE_COUNT 20

/***********************************************************************
 * Calculates the information content of a position of the motif.
 ***********************************************************************/
static inline double position_information_content(
  MOTIF_T *a_motif,
  int position
) {
  int i, asize;
  double H, item;
  ARRAY_T *freqs;

  asize = alph_size(a_motif->alph, ALPH_SIZE);
  H = 0;
  freqs = get_matrix_row(position, a_motif->freqs);
  for (i = 0; i < asize; ++i) {
    item = get_array_item(i, freqs);
    H -= item*my_log2(item);
  }
  return my_log2(asize) - H;
}

/***********************************************************************
 * Get or set the identifier of a motif.
 ***********************************************************************/
char* get_motif_id
  (MOTIF_T* motif)
{
  return motif->id + 1; // without strand indicator
}

char* get_motif_st_id
  (MOTIF_T* motif) 
{
  // skip strand indicator for protein
  if (motif->alph == PROTEIN_ALPH) return motif->id+1;
  return motif->id; // with strand indicator
}

void set_motif_id
  (const char* id,
   int len,
   MOTIF_T* motif)
{
  // the first character is the strand indicator which defaults to '?'
  if (motif->id[0] != '+' && motif->id[0] != '-') motif->id[0] = '?';
  len = (len < MAX_MOTIF_ID_LENGTH ? len : MAX_MOTIF_ID_LENGTH);
  strncpy(motif->id+1, id, len);
  motif->id[len+1] = '\0'; // ensure null termination
}

char* get_motif_id2
  (MOTIF_T* motif)
{
  return motif->id2;
}

void set_motif_id2
  (const char* id2,
   int len,
   MOTIF_T* motif)
{
  len = (len < MAX_MOTIF_ID_LENGTH ? len : MAX_MOTIF_ID_LENGTH);
  strncpy(motif->id2, id2, len);
  motif->id2[len] = '\0'; // ensure null termination
}

/***********************************************************************
 * Get or set the strand of a motif.
 ***********************************************************************/
void set_motif_strand
  (char strand,
   MOTIF_T *motif)
{
  assert(strand == '?' || strand == '-' || strand == '+');
  motif->id[0] = strand;
}

char get_motif_strand
  (MOTIF_T *motif)
{
  return motif->id[0];
}

/***********************************************************************
 * Get the frequencies
 ***********************************************************************/
MATRIX_T* get_motif_freqs(MOTIF_T* motif){
  return motif->freqs;
}

/***********************************************************************
 * Get the E-value of a motif.
 ***********************************************************************/
double get_motif_evalue
  (MOTIF_T* motif)
{
  return motif->evalue;
}

/***********************************************************************
 * Get the complexity of a motif.
 ***********************************************************************/
double get_motif_complexity
  (MOTIF_T *motif)
{
  return motif->complexity;
}

/***********************************************************************
 * Get the E-value of a motif.
 ***********************************************************************/
double get_motif_nsites
  (MOTIF_T* motif)
{
  return motif->num_sites;
}

/***********************************************************************
 * Get the motif length
 ***********************************************************************/
int get_motif_length
  (const MOTIF_T* motif) 
{
  return motif->length;
}

/***********************************************************************
 * Get the motif alphabet
 ***********************************************************************/
int get_motif_alph
  (MOTIF_T* motif) 
{
  return motif->alph;
}

/***********************************************************************
 * Get the motif strands
 * TRUE if the motif was created looking at both strands
 ***********************************************************************/
BOOLEAN_T get_motif_strands
  (MOTIF_T *motif)
{
  return ((motif->flags & MOTIF_BOTH_STRANDS) != 0);
}

/***********************************************************************
 * Get the motif length after trimming
 ***********************************************************************/
int get_motif_trimmed_length
  (MOTIF_T* motif)
{
  return motif->length - motif->trim_left - motif->trim_right;
}

/***********************************************************************
 * Get the motif alphabet size (non-ambiguous letters)
 ***********************************************************************/
int get_motif_alph_size
  (MOTIF_T* motif) 
{
  return alph_size(motif->alph, ALPH_SIZE);
}


/***********************************************************************
 * Get the motif ambiguous alphabet size (only ambiguous letters)
 ***********************************************************************/
int get_motif_ambiguous_size
  (MOTIF_T* motif)
{
  return (motif->flags & MOTIF_HAS_AMBIGS ? alph_size(motif->alph, AMBIG_SIZE) : 0);
}

/***********************************************************************
 * Get the position specific score for a letter
 ***********************************************************************/
double get_motif_score
  (MOTIF_T* motif, int position, int i_alph)
{
  return get_matrix_cell(position, i_alph, motif->scores);
}

/***********************************************************************
 * Return one column of a motif, as a newly allocated array of counts.
 * This assumes that num_sites is a reasonable value and not zero...
 ***********************************************************************/
ARRAY_T* get_motif_counts
  (int      position,
   MOTIF_T* motif)
{
  int i_alph, asize;
  ARRAY_T* return_value;
  
  asize = alph_size(motif->alph, ALPH_SIZE);
  return_value = allocate_array(asize);

  for (i_alph = 0; i_alph < asize; i_alph++) {
    set_array_item(i_alph, motif->num_sites * 
        get_matrix_cell(position, i_alph, motif->freqs), return_value);
  }
  return(return_value);
}

/***********************************************************************
 * Get the url of a motif
 ***********************************************************************/
char* get_motif_url
  (MOTIF_T *motif)
{
  return motif->url;
}

/***********************************************************************
 * Set the url of a motif
 ***********************************************************************/
void set_motif_url
  (char    *url,
   MOTIF_T *motif)
{
  if (motif->url) {
    free(motif->url);
    motif->url = NULL;
  }
  copy_string(&(motif->url), url);
}

/***********************************************************************
 * Get the number of positions to trim from the left of the motif
 ***********************************************************************/
int get_motif_trim_left
  (MOTIF_T *motif)
{
  return motif->trim_left;
}

/***********************************************************************
 * Get the number of positions to trim from the right of the motif
 ***********************************************************************/
int get_motif_trim_right
  (MOTIF_T *motif)
{
  return motif->trim_right;
}

/***********************************************************************
 * Clear the motif trim
 ***********************************************************************/
void clear_motif_trim
  (MOTIF_T *motif)
{
  motif->trim_left = 0;
  motif->trim_right = 0;
}

/***********************************************************************
 * Determine whether a given motif is in a given list of motifs.
 ***********************************************************************/
BOOLEAN_T have_motif
  (char*    motif_id,
   int      num_motifs,
   MOTIF_T* motifs)
{
  int i_motif;

  for (i_motif = 0; i_motif < num_motifs; i_motif++) {
    if (strcmp(motifs[i_motif].id, motif_id) == 0) {
      return(TRUE);
    }
  }
  
  return(FALSE);

}

/***********************************************************************
 * Copy a motif from one place to another.
 ***********************************************************************/
void copy_motif
  (MOTIF_T* source,
   MOTIF_T* dest)
{
  ALPH_SIZE_T size;
  memset(dest, 0, sizeof(MOTIF_T));
  strcpy(dest->id, source->id);
  strcpy(dest->id2, source->id2);
  dest->length = source->length;
  dest->alph = source->alph;
  dest->flags = source->flags;
  dest->evalue = source->evalue;
  dest->num_sites = source->num_sites;
  dest->complexity = source->complexity;
  dest->trim_left = source->trim_left;
  dest->trim_right = source->trim_right;
  if (source->freqs) {
    size = (dest->flags & MOTIF_HAS_AMBIGS ? ALL_SIZE : ALPH_SIZE);
    // Allocate memory for the matrix.
    dest->freqs = allocate_matrix(dest->length, alph_size(dest->alph, size));
    // Copy the matrix.
    copy_matrix(source->freqs, dest->freqs);
  } else {
    dest->freqs = NULL;
  }
  if (source->scores) {
    // Allocate memory for the matrix. Note that scores don't contain ambigs.
    dest->scores = allocate_matrix(dest->length, alph_size(dest->alph, ALPH_SIZE));
    // Copy the matrix.
    copy_matrix(source->scores, dest->scores);
  } else {
    dest->scores = NULL;
  }
  copy_string(&(dest->url), source->url);
}

/***********************************************************************
 * Compute the complement of one DNA frequency distribution.
 * 
 * Assumes DNA alphabet in order ACGT.
 ***********************************************************************/
void complement_dna_freqs
  (ARRAY_T* source,
   ARRAY_T* dest)
{
  set_array_item(0, get_array_item(3, source), dest); // A -> T
  set_array_item(1, get_array_item(2, source), dest); // C -> G
  set_array_item(2, get_array_item(1, source), dest); // G -> C
  set_array_item(3, get_array_item(0, source), dest); // T -> A

  //check if the frequencies have ambiguous characters
  //for example meme does not use ambiguous characters
  if (get_array_length(source) > 4) {
    calc_ambigs(DNA_ALPH, FALSE, dest);
  }
}

/***********************************************************************
 * Convert array by compute the average of complementary dna frequencies.
 *
 * Apparently no-one uses this.
 *
 * Assumes DNA alphabet in order ACGT.
 ***********************************************************************/
void balance_complementary_dna_freqs
  (ARRAY_T* source)
{
  double at = (get_array_item(0, source)+get_array_item(3, source))/2.0;
  double cg = (get_array_item(1, source)+get_array_item(2, source))/2.0;
  set_array_item(0, at, source); // A -> T
  set_array_item(1, cg, source); // C -> G
  set_array_item(2, cg, source); // G -> C
  set_array_item(3, at, source); // T -> A

  calc_ambigs(DNA_ALPH, FALSE, source);
}

/***********************************************************************
 * Takes a matrix of letter probabilities and converts them into meme
 * score.
 *
 * Assuming the probability is nonzero the score is just: 
 * s = log2(p / bg) * 100
 *
 ***********************************************************************/
MATRIX_T* convert_freqs_into_scores
  (ALPH_T alph,
   MATRIX_T *freqs,
   ARRAY_T *bg,
   int site_count,
   double pseudo_count) 
{
  int asize, length;
  double freq, score, total_count, counts, bg_freq;
  MATRIX_T *scores;
  int row, col;

  assert(alph != INVALID_ALPH);
  assert(freqs != NULL);
  assert(bg != NULL);

  length = get_num_rows(freqs);
  asize = alph_size(alph, ALPH_SIZE);

  scores = allocate_matrix(length, asize);
  total_count = site_count + pseudo_count;

  for (col = 0; col < asize; ++col) {
    bg_freq = get_array_item(col, bg);
    for (row = 0; row < length; ++row) {
      freq = get_matrix_cell(row, col, freqs);
      // apply a pseudo count
      freq = ((pseudo_count * bg_freq) + (freq * site_count)) / total_count;
      // if the background is correct this shouldn't happen
      if (freq <= 0) freq = 0.0000005;
      // convert to a score
      score = (log(freq / bg_freq) / log(2)) * 100;
      set_matrix_cell(row, col, score, scores);
    }
  }
  return scores;
}

/***********************************************************************
 * Takes a matrix of meme scores and converts them into letter 
 * probabilities.
 *
 * The probablility can be got by:
 * p = (2 ^ (s / 100)) * bg
 *
 ***********************************************************************/
MATRIX_T* convert_scores_into_freqs
  (ALPH_T alph,
   MATRIX_T *scores,
   ARRAY_T *bg,
   int site_count,
   double pseudo_count)
{
  int asize, length;
  double freq, score, total_count, counts, bg_freq;
  MATRIX_T *freqs;
  int row, col;

  assert(alph != INVALID_ALPH);
  assert(scores != NULL);
  assert(bg != NULL);

  length = get_num_rows(scores);
  asize = alph_size(alph, ALPH_SIZE);

  freqs = allocate_matrix(length, asize);
  total_count = site_count + pseudo_count;

  for (col = 0; col < asize; ++col) {
    bg_freq = get_array_item(col, bg);
    for (row = 0; row < length; ++row) {
      score = get_matrix_cell(row, col, scores);
      // convert to a probability
      freq = pow(2.0, score / 100.0) * bg_freq;
      // remove the pseudo count
      freq = ((freq * total_count) - (bg_freq * pseudo_count)) / site_count;
      if (freq < 0) freq = 0;
      else if (freq > 1) freq = 1;
      set_matrix_cell(row, col, freq, freqs);
    }
  }
  for (row = 0; row < length; ++row) {
    normalize_subarray(0, asize, 0.0, get_matrix_row(row, freqs));
  }

  return freqs;
}

/***********************************************************************
 * Turn a given motif into its own reverse complement.
 ***********************************************************************/
void reverse_complement_motif
  (MOTIF_T* a_motif)
{
  ALPH_SIZE_T size;
  int i, temp_trim;
  ARRAY_T* left_freqs;
  ARRAY_T* right_freqs;
  ARRAY_T* temp_freqs;   // Temporary space during swap.

  assert(a_motif->alph == DNA_ALPH);

  // Allocate space.
  size = (a_motif->flags & MOTIF_HAS_AMBIGS ? ALL_SIZE : ALPH_SIZE);
  temp_freqs = allocate_array(alph_size(a_motif->alph, size));

  // Consider each row (position) in the motif.
  for (i = 0; i < (int)((a_motif->length + 1) / 2); i++) {
    left_freqs = get_matrix_row(i, a_motif->freqs);
    right_freqs = get_matrix_row(a_motif->length - (i + 1), a_motif->freqs);

    // Make a temporary copy of one row.
    copy_array(left_freqs, temp_freqs);

    // Compute reverse complements in both directions.
    complement_dna_freqs(right_freqs, left_freqs);
    complement_dna_freqs(temp_freqs, right_freqs);
  }
  free_array(temp_freqs);
  if (a_motif->scores) {
    // Allocate space.
    temp_freqs = allocate_array(alph_size(a_motif->alph, ALPH_SIZE));

    // Consider each row (position) in the motif.
    for (i = 0; i < (int)((a_motif->length + 1) / 2); i++) {
      left_freqs = get_matrix_row(i, a_motif->scores);
      right_freqs = get_matrix_row(a_motif->length - (i + 1), a_motif->scores);

      // Make a temporary copy of one row.
      copy_array(left_freqs, temp_freqs);

      // Compute reverse complements in both directions.
      complement_dna_freqs(right_freqs, left_freqs);
      complement_dna_freqs(temp_freqs, right_freqs);
    }
    free_array(temp_freqs);
  }
  //swap the trimming variables
  temp_trim = a_motif->trim_left;
  a_motif->trim_left = a_motif->trim_right;
  a_motif->trim_right = temp_trim;
  //swap the strand indicator
  //this assumes a ? is equalivant to +
  if (get_motif_strand(a_motif) == '-') {
    set_motif_strand('+', a_motif);
  } else {
    set_motif_strand('-', a_motif);
  }
}

/***********************************************************************
 * Apply a pseudocount to the motif pspm.
 ***********************************************************************/
void apply_pseudocount_to_motif
  (MOTIF_T* motif, ARRAY_T *background, double pseudocount)
{
  int pos, letter, len, asize, sites;
  double prob, count, total;
  ARRAY_T *temp;

  // no point in doing work when it makes no difference
  if (pseudocount == 0) return;
  assert(pseudocount > 0);
  // motif dimensions
  asize = alph_size(motif->alph, ALPH_SIZE);
  len = motif->length;
  // create a uniform background if none is given
  temp = NULL;
  if (background == NULL) {
    temp = allocate_array(asize);
    get_uniform_frequencies(motif->alph, temp);
    background = temp;
  }
  // calculate the counts
  sites = (motif->num_sites > 0 ? motif->num_sites : DEFAULT_SITE_COUNT);
  total = sites + pseudocount;
  for (pos = 0; pos < len; ++pos) {
    for (letter = 0; letter < asize; ++letter) {
      prob = get_matrix_cell(pos, letter, motif->freqs);
      count = (prob * sites) + (pseudocount * get_array_item(letter, background));
      prob = count / total;
      set_matrix_cell(pos, letter, prob, motif->freqs);
    }
  }
  if (temp) free_array(temp);
}

/***********************************************************************
 * Calculate the ambiguous letters from the concrete ones.
 ***********************************************************************/
void calc_motif_ambigs
  (MOTIF_T *motif)
{
  int i_row;
  resize_matrix(motif->length, alph_size(motif->alph, ALL_SIZE), 0, motif->freqs);
  motif->flags |= MOTIF_HAS_AMBIGS;
  for (i_row = 0; i_row < motif->length; ++i_row) {
    calc_ambigs(motif->alph, FALSE, get_matrix_row(i_row, motif->freqs));
  }
}

/***********************************************************************
 * Normalize the motif's pspm
 ***********************************************************************/
void normalize_motif
  (MOTIF_T *motif, double tolerance)
{
  int i_row, asize;
  asize = alph_size(motif->alph, ALPH_SIZE);
  for (i_row = 0; i_row < motif->length; ++i_row) {
    normalize_subarray(0, asize, tolerance, get_matrix_row(i_row, motif->freqs));
  }
}

/***********************************************************************
 * Set the trimming bounds on the motif.
 *
 * Reads from the left and right until it finds a motif position with
 * an information content larger or equal to the threshold in bits.
 * 
 ***********************************************************************/
void trim_motif_by_bit_threshold(
  MOTIF_T *a_motif, 
  double threshold_bits
) {
  int i, len;

  len = a_motif->length;
  for (i = 0; i < len; ++i) {
    if (position_information_content(a_motif, i) >= threshold_bits) break;
  }
  a_motif->trim_left = i;
  if (i == len) {
    a_motif->trim_right = 0;
    return;
  }
  for (i = len-1; i >= 0; --i) {
    if (position_information_content(a_motif, i) >= threshold_bits) break;
  }
  a_motif->trim_right = len - i - 1;
}

/***********************************************************************
 * Compute the complexity of a motif as a number between 0 and 1.
 *
 * Motif complexity is the average K-L distance between the "motif
 * background distribution" and each column of the motif.  The motif
 * background is just the average distribution of all the columns.  The
 * K-L distance, which measures the difference between two
 * distributions, is the same as the information content:
 *
 *  \sum_i p_i log(p_i/f_i)
 *
 * This value increases with increasing complexity.
 ***********************************************************************/
double compute_motif_complexity
  (MOTIF_T* a_motif)
{
  double return_value;
  ARRAY_T* motif_background;  // Mean emission distribution.
  int num_rows;
  int i_row;
  int num_cols;
  int i_col;

  num_cols = alph_size(a_motif->alph, ALPH_SIZE);
  num_rows = a_motif->length;

  // Compute the mean emission distribution.
  motif_background = get_matrix_col_sums(a_motif->freqs);
  scalar_mult(1.0 / (double)num_rows, motif_background);

  // Compute the K-L distance w.r.t. the background.
  return_value = 0;
  for (i_row = 0; i_row < num_rows; i_row++) {
    ARRAY_T* this_emission = get_matrix_row(i_row, a_motif->freqs);
    for (i_col = 0; i_col < num_cols; i_col++) {
      ATYPE this_item = get_array_item(i_col, this_emission);
      ATYPE background_item = get_array_item(i_col, motif_background);

      // Use two logs to avoid handling divide-by-zero as a special case.
      return_value += this_item 
	* (my_log(this_item) - my_log(background_item));
    }
  }

  free_array(motif_background);
  return(return_value / (double)num_rows);
}

/***********************************************************************
 * Compute the number of positions from the start or end of a motif
 * that contain a given percentage of the information content.
 *
 * Information content is the same as relative entropy, and is computed
 * as
 *
 *  \sum_i p_i log(p_i/f_i)
 *
 ***********************************************************************/
int get_info_content_position
  (BOOLEAN_T from_start, // Count from start?  Otherwise, count from end.
   float     threshold,  // Information content threshold (in 0-100).
   ARRAY_T*  background, // Background distribution.
   MOTIF_T*  a_motif)
{
  // Make sure the given threshold is in the right range.
  if ((threshold < 0.0) || (threshold > 100.0)) {
    die(
      "Information threshold (%g) must be a percentage between 0 and 100.\n",
	    threshold
    );
  }

  // Get the dimensions of the motif.
  int num_cols = alph_size(a_motif->alph, ALPH_SIZE);
  int num_rows = a_motif->length;

  // Compute and store the information content for each row
  // and the total information content for the motif.
  ATYPE total_information_content = 0.0;
  ARRAY_T* information_content = allocate_array(num_rows);
  int i_row;
  int i_col;
  for (i_row = 0; i_row < num_rows; i_row++) {
    ATYPE row_content = 0.0;
    ARRAY_T* this_emission = get_matrix_row(i_row, a_motif->freqs);
    for (i_col = 0; i_col < num_cols; i_col++) {
      ATYPE this_item = get_array_item(i_col, this_emission);
      ATYPE background_item = get_array_item(i_col, background);

      // Use two logs to avoid handling divide-by-zero as a special case.
      ATYPE partial_row_content = 
        this_item * (my_log(this_item) - my_log(background_item));

      row_content += partial_row_content;
      total_information_content += partial_row_content;

    }
    set_array_item(i_row, row_content, information_content);
  }

  // Search for the target position.
  int return_value = -1;
  ATYPE cumulative_content = 0.0;
  ATYPE percent = 0.0;
  if (from_start) {
    // Search from start for IC exceeding threshold.
    for (i_row = 0; i_row < num_rows; i_row++) {
      cumulative_content += get_array_item(i_row, information_content);
      percent = 100 *  cumulative_content / total_information_content;
      if (percent >= threshold) {
        return_value = i_row;
        break;
      }
    }
  }
  else {
    // Search from end for IC exceeding threshold.
    for (i_row = num_rows - 1; i_row >= 0; i_row--) {
      cumulative_content += get_array_item(i_row, information_content);
      percent = 100 *  cumulative_content / total_information_content;
      if (percent >= threshold) {
        return_value = i_row;
        break;
      }
    }
  }

  if (return_value == -1) {
    die(
      "Can't find a position that accounts for %g of information content.",
      threshold
    );
  }
  free_array(information_content);
  return(return_value);
}


/***********************************************************************
 * Returns the string that is the best possible match to the given motif.
 * Caller is responsible for freeing string.
 ***********************************************************************/
char *get_best_possible_match(MOTIF_T *motif) {
  int mpos, apos, asize; 
  char *match_string;
  ALPH_SIZE_T size;

  asize = alph_size(motif->alph, ALPH_SIZE);
  
  assert(motif != NULL);
  assert(motif->freqs != NULL);
  assert(motif->length == motif->freqs->num_rows);
  size = (motif->flags & MOTIF_HAS_AMBIGS ? ALL_SIZE : ALPH_SIZE);
  assert(alph_size(motif->alph, size) == motif->freqs->num_cols); 

  match_string = mm_malloc(sizeof(char) * (motif->length + 1));

  // Find the higest scoring character at each position in the motif.
  for(mpos = 0; mpos < motif->length; ++mpos) {
    ARRAY_T *row = motif->freqs->rows[mpos];
    double max_v = row->items[0];
    int max_i = 0;
    for(apos = 1; apos < asize; ++apos) {
     if (row->items[apos] >= max_v) {
        max_i = apos;
        max_v = row->items[apos];
     }
    }
    match_string[mpos] = alph_char(motif->alph, max_i);
  }

  //  Add null termination
  match_string[motif->length] = '\0';

  return match_string;
}

/***********************************************************************
 * Duplicates and reverse complements the motif
 ***********************************************************************/
MOTIF_T* dup_rc_motif
  (MOTIF_T *motif)
{
  MOTIF_T *rc_motif;
  rc_motif = mm_malloc(sizeof(MOTIF_T));
  copy_motif(motif, rc_motif);
  reverse_complement_motif(rc_motif);
  return rc_motif;
}

/***********************************************************************
 * Free dynamic memory used by one motif assuming the structure itself
 * does not need to be freed. 
 ***********************************************************************/
void free_motif
  (MOTIF_T *a_motif)
{
  /* Don't bother with empty motifs. */
  if (a_motif == NULL) 
    return;

  // Reset all memeber values
  free_matrix(a_motif->freqs);
  free_matrix(a_motif->scores);
  if (a_motif->url) free(a_motif->url);
  memset(a_motif, 0, sizeof(MOTIF_T));
}

/***********************************************************************
 * Free dynamic memory used by a given motif and free the structure.
 * To be useable by collections it takes a void * but expects
 * a MOTIF_T *.
 ***********************************************************************/
void destroy_motif
  (void * a_motif)
{
  free_motif((MOTIF_T*)a_motif);
  free((MOTIF_T*)a_motif);
}

/***********************************************************************
 * Convert a list of motifs into an array of motifs with a count.
 * This is intended to allow backwards compatibility with the older
 * version.
 ***********************************************************************/
void motif_list_to_array(ARRAYLST_T *motif_list, MOTIF_T **motif_array, int *num) {
  int count, i;
  MOTIF_T *motifs;
  count = arraylst_size(motif_list);
  motifs = (MOTIF_T*)mm_malloc(sizeof(MOTIF_T) * count);
  for (i = 0; i < count; ++i) {
    copy_motif((MOTIF_T*)arraylst_get(i, motif_list), motifs+i);
  }
  *motif_array = motifs;
  *num = count;
}

/***********************************************************************
 * Convert a tree of motifs into an array of motifs with a count.
 * This is intended to allow backwards compatibility with the older
 * version.
 ***********************************************************************/
void motif_tree_to_array(RBTREE_T *motif_tree, MOTIF_T **motif_array, int *num) {
  int count, i;
  MOTIF_T *motifs;
  RBNODE_T *node;

  count = rbtree_size(motif_tree);
  motifs = mm_malloc(sizeof(MOTIF_T) * count);
  for (i = 0, node = rbtree_first(motif_tree); node != NULL; i++, node = rbtree_next(node)) {
    copy_motif((MOTIF_T*)rbtree_value(node), motifs+i);
  }
  *motif_array = motifs;
  *num = count;
}

/***********************************************************************
 * Get the motif at the selected index in the array. This is needed for
 * the older version which used arrays of motifs.
 ***********************************************************************/
MOTIF_T* motif_at(MOTIF_T *motif_array, int index) {
  return motif_array+index;
}

/***********************************************************************
 * Free an array of motifs
 ***********************************************************************/
void free_motif_array(MOTIF_T *motif_array, int num) {
  int i;
  for (i = 0; i < num; ++i) {
    free_motif(motif_array+i);
  }
  free(motif_array);
}

/***********************************************************************
 * free an array list and the contained motifs
 ***********************************************************************/
void free_motifs(
  ARRAYLST_T *motif_list
) {
  arraylst_destroy(destroy_motif, motif_list);
}

/***********************************************************************
 * Create two copies of each motif.  The new IDs are preceded by "+"
 * and "-", and the "-" version is the reverse complement of the "+"
 * version.
 *
 * The reverse complement is always listed directly after the original.
 ***********************************************************************/
void add_reverse_complements
  (ARRAYLST_T* motifs)
{
  int i, count;
  char motif_id[MAX_MOTIF_ID_LENGTH + 1]; // Name of the current motif;

  count = arraylst_size(motifs);
  //double the array size
  arraylst_preallocate(count*2, motifs);
  arraylst_add_n(count, NULL, motifs);
  //move and reverse complement the original motifs
  for (i = count-1; i >= 0; --i) {
    //move the motif to its new position
    arraylst_swap(i, 2*i, motifs);
    //get the motif and the one that will become the reverse complement
    MOTIF_T *orig = arraylst_get(2*i, motifs);
    if (get_motif_strand(orig) == '?') set_motif_strand('+', orig);
    //copy and reverse complement the motif to the position after
    MOTIF_T *rc = dup_rc_motif(orig);
    arraylst_set(2*i + 1, rc, motifs);
  }
  assert(arraylst_size(motifs) == (2 * count));
}