  sql_create_8(sql_region, 1, 8,
               mysqlpp::sql_int, region_id,
               mysqlpp::sql_int, group_id,
               mysqlpp::sql_enum, species,
               mysqlpp::sql_varchar, dna,
               mysqlpp::sql_int, start,
               mysqlpp::sql_int, end,
               mysqlpp::sql_enum, strand,
               mysqlpp::sql_varchar, name);




  string RegionsTableQuery(const string& table, 
                           const string& species_list){

	ostringstream os;
	//make species_list a legal list

	os<<"CREATE TABLE "<<table<<" ("
	  <<"region_id int(10) NOT NULL,"
	  <<"group_id int(10) NOT NULL,"
	  <<"species enum("<<sql::Enum(species_list)<<") NOT NULL,"
	  <<"dna VARCHAR(100) NOT NULL,"
	  <<"start int(10) NOT NULL,"
	  <<"end int(10) NOT NULL,"
	  <<"strand enum('POS', 'NEG', 'BOTH') NOT NULL,"
	  <<"name VARCHAR(100) NOT NULL,"
	  <<"PRIMARY KEY (region_id),"
	  <<"KEY sd (species,dna),"
	  <<"KEY pos1 (start),"
	  <<"KEY pos2 (end)"
	  <<");";

	return os.str();
  }


  void RegionsToSQL(REG_MAP const& regsmap, 
                    mysqlpp::Connection &con,
                    const string & species_list,
                    const string & table){
	
	mysqlpp::Query query = con.query();
	sql::SafeExecute(query, sql::DropTable(table));
	sql::SafeExecute(query, RegionsTableQuery(table, species_list));
	sql_region::table(table.c_str());
	sql_region sr;
	
	foreach(REG_MAP::value_type p, regsmap){
      dna_t const& dna = *p.first;
      REGIONS_MULTI &regs = p.second;
      foreach(REG_PC rpc, regs){
        REG_CR r = *rpc;
			
        sr.set(r.id, r.group_id, dna.species(), dna.name, 
               r.start, r.end, cis::PrintDNAStrand(r.strand), r.name);

        query.insert(sr);
        query.execute();
        query.reset();
      }
	}
  }

  REG_MAP SQLtoRegions(mysqlpp::Connection & con,
                       DNAS& dnas, 
                       string const& region_query,
                       bool onebased,
                       unsigned int five_prime_flank,
                       unsigned int three_prime_flank) throw (string &);

  void RegionsToSQL(REG_MAP const&, mysqlpp::Connection &, string const&, string const&);

  class LoadGFFRow {

  public:

	REG_MAP & regions;
	DNAS const& dnas;
	const bool onebased;
	int five_prime_flank;
	int three_prime_flank;
	
	LoadGFFRow(REG_MAP & r, DNAS &d, const bool o, 
			   int sf, int ef) :
      regions(r), dnas(d), onebased(o),
      five_prime_flank(sf), three_prime_flank(ef)
	{}

	void operator()(const sql_region & g){
      int start_adjust = onebased ? -1 : 0;
      DNA_P dna = GetDNAByName(dnas, g.species, g.dna);
      if (dna == NULL) {}
      else {
        int64_t start = min(g.start,g.end);
        int64_t end = max(g.start,g.end);
        REG_P reg = MakeRegion(dna, start, end, g.strand, g.name,
                               start_adjust, five_prime_flank, three_prime_flank,
                               g.region_id, g.group_id);

        if (regions.find(dna) == regions.end()) regions[dna] = REGIONS_MULTI();
        if (reg != NULL) regions[dna].insert(reg);
      }
	}
  };


  REG_MAP SQLtoRegions(mysqlpp::Connection & con,
                       DNAS& dnas, 
                       string const& region_query,
                       bool onebased,
                       unsigned int five_prime_flank,
                       unsigned int three_prime_flank) throw (string &) {
	
	mysqlpp::Query query = con.query(region_query.c_str());
	REG_MAP regions;
	LoadGFFRow lgr(regions, dnas, onebased, five_prime_flank, three_prime_flank);
	//query<<region_query;
	query.for_each(LoadGFFRow(regions, dnas, onebased, 
                              five_prime_flank, three_prime_flank));
	return regions;
	
  }


  std::map<parser_id, std::string> rule_names;

  rule_names[inputID] = "inputID";
  rule_names[cluster_groupID] = "cluster_groupID";
  rule_names[total_hitsID] = "total_hitsID";
  rule_names[min_scoreID] = "min_scoreID";
  rule_names[strandID] = "strandID";
  rule_names[pattern_kindID] = "pattern_kindID";
  rule_names[pattern_nameID] = "pattern_nameID";
  rule_names[vert_matrixID] = "vert_matrixID";
  rule_names[matrix_entryID] = "matrix_entryID";
  rule_names[horz_matrixID] = "horz_matrixID";
  rule_names[pattern_propID] = "pattern_propID";
  rule_names[collectionID] = "collectionID";
  rule_names[clusterID] = "clusterID";
  rule_names[cluster_nameID] = "cluster_nameID";
  rule_names[cluster_typeID] = "cluster_typeID";
  rule_names[cluster_windowID] = "cluster_windowID";
  rule_names[cluster_min_requirementsID] = "cluster_min_requirementsID";
  rule_names[cluster_requirementsID] = "cluster_requirementsID";
  rule_names[cluster_requirementID] = "cluster_requirementID";

  
sql_create_6(sql_rel_region, 1, 6,
             mysqlpp::sql_int, region_id,
             mysqlpp::sql_enum, rel_position,
             mysqlpp::sql_enum, same_strand,
             mysqlpp::sql_int, offset,
             mysqlpp::sql_int, num_genes_between,
             mysqlpp::sql_int, target_region_id);


  string SQLRelRegionTable(const string &table, const string & species_list){
	ostringstream os;
	os<<"CREATE TABLE "<<table<<" ("
	  <<"region_id INT(10) unsigned NOT NULL,"
	  <<"rel_position enum('HEAD', 'TAIL', 'INTRON', 'EXON', 'PARTIAL_EXON', 'OTHER') NOT NULL,"
	  <<"same_strand enum('SAME', 'OPP') NOT NULL,"
	  <<"offset INT(10) NOT NULL,"
	  <<"num_genes_between INT(3) NOT NULL,"
      <<"target_region_id INT(10) NOT NULL,"
	  <<"PRIMARY KEY (region_id,target_region_id),"
	  <<"KEY (region_id)"
      <<"KEY (target_region_id)"
	  <<");";

	return os.str();
  }

    //these should all have the proper sequence
/*     for (std::vector<std::pair<cis::DNA_P, cis::GPOS> >::iterator it = loci.begin();  */
/*          it != loci.end(); ++it){ */
/*       dna = (*it).first; */
/*       chr_pos = (*it).second; */
      
/*       //std::cout<<dna->species()<<":"<<dna->name<<":"<<chr_pos<<endl; */
      
/*       dna->source().seekg(dna->seek_start_pos+chr_pos, std::ios::beg); */
/*       dna->source().read(seq_extension, ival.sequence.size()); */
/*       cis::TranslateSeq(dseq, seq_extension, ival.sequence.size()); */
/*       cis::dnastring dseqs(dseq, ival.sequence.size()); */
/*       if (dseqs != ival.sequence){ */
/*         printf ("dseqs != ival.sequence at %i\n%s\n%s\n", */
/*                 (int)chr_pos, */
/*                 cis::FromDNAString(dseqs).c_str(), */
/*                 cis::FromDNAString(ival.sequence).c_str()); */
/*       } */
/*     }			 */



//   istream * trie_stream; //stream for itrie file
//   istream * gpos_stream; //stream for GPOS file
//   char * trie_buf;
//   char * gpos_buf;

//   trie_stream_temp.seekg(0, std::ios::end);
//   std::streampos trie_length = trie_stream_temp.tellg();
//   trie_stream_temp.seekg(0, std::ios::beg);
  
//   gpos_stream_temp.seekg(0, std::ios::end);
//   int64_t gpos_length = gpos_stream_temp.tellg();
  
  /*
  if (memory_scan_mode){
    trie_stream = new std::istringstream;
    std::ifstream trie_stream_temp(Parsed.treefile.c_str(), 
                                   std::ios::in | std::ios::binary);
    trie_stream_temp.seekg(0, std::ios::end);
    std::streampos trie_length = trie_stream_temp.tellg();
    trie_stream_temp.seekg(0, std::ios::beg);
    trie_buf = new char[trie_length + 1];
    if (trie_buf == NULL){
      fprintf(stderr, "Couldn't allocate %li bytes for gpos file.  Exiting.\n",
              trie_length);
      exit(50);
    }
    
    trie_stream_temp.read(trie_buf, trie_length);
    trie_stream_temp.close();
    trie_stream.rdbuf()->pubsetbuf(trie_buf, trie_length + 1);
    trie_stream.seekg(0, std::ios::beg);

    gpos_stream = new std::istringstream;
    std::ifstream gpos_stream_temp(Parsed.posfile.c_str(),
                                   std::ios::in | std::ios::binary);
    gpos_stream_temp.seekg(0, std::ios::end);
    int64_t gpos_length = gpos_stream_temp.tellg();
    gpos_stream_temp.seekg(0, std::ios::beg);
    gpos_buf = new char[gpos_length + 1];
    if (gpos_buf == NULL){
      fprintf(stderr, "Couldn't allocate %li bytes for gpos file.  Exiting.\n",
              gpos_length);
      exit(50);
    }
    gpos_stream_temp.read(gpos_buf, gpos_length);
    gpos_stream_temp.close();
    gpos_stream.rdbuf()->pubsetbuf(gpos_buf, gpos_length + 1);
    gpos_stream.seekg(0, std::ios::beg);
    
  }
  */


//   else {
//     static_cast<std::ifstream *>(trie_stream)->open(Parsed.treefile.c_str(), 
//                                                     std::ios::in | std::ios::binary);
//     trie_stream.seekg(0, std::ios::end);
//     std::streampos trie_length = trie_stream.tellg();
//     trie_stream.seekg(0, std::ios::beg);

    //set a large memory buffer
//     trie_buf = new char[trie_length + 1];
//     if (trie_buf == NULL){
//       fprintf(stderr, "Couldn't allocate %li bytes for trie file.  Exiting.\n",
//               trie_length);
//       exit(50);
//     }
    //trie_stream->rdbuf()->pubsetbuf(trie_buf, trie_length + 1);

//     gpos_stream = new std::ifstream;
//     static_cast<std::ifstream *>(gpos_stream)->open(Parsed.posfile.c_str(),
//                                                     std::ios::in | std::ios::binary);
//     gpos_stream.seekg(0, std::ios::end);
//     std::streampos gpos_length = gpos_stream.tellg();
//     gpos_stream.seekg(0, std::ios::beg);

    //set a large memory buffer
//     gpos_buf = new char[gpos_length + 1];
//     if (gpos_buf == NULL){
//       fprintf(stderr, "Couldn't allocate %li bytes for gpos file.  Exiting.\n",
//               gpos_length);
//       exit(50);
//     }
//     gpos_stream->rdbuf()->pubsetbuf(gpos_buf, gpos_length + 1);

  if (static_cast<std::ifstream *>(trie_stream)->is_open()) { 
    static_cast<std::ifstream *>(trie_stream)->close(); 
    delete trie_stream; 
  }

  if (static_cast<std::ifstream *>(gpos_stream)->is_open()) { 
    static_cast<std::ifstream *>(gpos_stream)->close();
    delete gpos_stream; 
  }

//   if (trie_buf != NULL) delete trie_buf;
//   if (gpos_buf != NULL) delete gpos_buf;

  
	/*
	  cluster_kind ClassifyCluster(char const* pat){
	  boost::regex FirstChunk("^(\\w+)");

	  string spat(pat);
	  string::const_iterator st = spat.begin();
	  string::const_iterator end = spat.end();

	  boost::smatch res;
	  boost::regex_search(st, end, res, FirstChunk, boost::match_extra);
	
	  cluster_kind ret;
	  cout<<res.str(1)<<endl;
	  if (res.str(1) == string("min_diversity")) ret = MIN_DIVERSITY;
	  else if (res.str(1) == string("diversity")) ret = DIVERSITY;
	  else if (res.str(1) == string("specific")) ret = SPECIFIC;
	  else if (res.str(1) == string("none")) ret = NO_CLUSTERING;
	  else {
	  cerr<<"Unknown kind of cluster criterion."<<endl;
	  exit(95);
	  }
	  return ret;
	  }
	*/



  //describes probability distribution of the spatial arrangement of
  //a functional regulon with its associated gene.
  //a mass function over the 1D space around a gene.
  class RegulonDistribution {

    typedef std::map<cis_enum::rel_position, PermutationCollection::POINTS> REL_POINTS;
    
  public:
    RegulonDistribution(std::map<cis_enum::rel_position, 
                        PermutationCollection::POINTS> const& distribution);
    
    std::map<cis_enum::rel_position, POINTS> spatial_log_cond_prob;

    //calculate P(D|F)/P(D) (see regulon.txt), the logOdds score of a functional
    //regulon occuring in the specified spatial relationship
    float SpatialLogOdds(cis_enum::rel_position relative_position,
                         int distance) const ;
    
  };
  
 

  RegulonDistribution::RegulonDistribution
  (std::map<cis_enum::rel_position, cis::POINTS> const& distribution) :
    spatial_log_cond_prob(distribution) { }


  //Log of Conditional Probability of a functional regulon occurring
  //in this spatial relationship P(D|F)/P(D), see regulon.txt
  float RegulonDistribution::SpatialLogConditionalProb
  (cis_enum::rel_position relative_position,
   int distance) const {

    REL_POINTS::const_iterator iterator =
      this->spatial_log_cond_prob.find(relative_position);

    if (iterator == this->spatial_log_cond_prob.end() ||
        (int)(*iterator).second.size() <= distance){
      return -HUGE_VAL;
    } else {
      return InterpolatePoint((*iterator).second, distance,-HUGE_VAL);
    }

  }


#include <map>
#include <string>

#include "nuctrie.h"

namespace cis { class hit; class pattern; }


class ModuleConservation {

 private:

  int * current_mapping;
  int * best_mapping;
  bool * target_sites_used;
  float best_score;

  //weights for combining the four component scores into one
  float offset_score_weight;
  float permutation_score_weight;
  float orientation_score_weight;
  float hits_similarity_score_weight;

  std::map<std::string, hit_trie *>::const_iterator pattern_iter;
  std::map<std::string, hit_trie *> patterns;

 public:

  ModuleConservation(std::map<std::string, hit_trie *> const& patterns);

  //calculate the optimal mapping and module similarity score
  //between query and target modules.
  float ModuleSimilarity(cis::hit const** query_module_hits,
                         int query_module_size,
                         cis::hit const** target_module_hits,
                         int target_module_size,
                         int * best_mapping,
                         bool * same_orientation);


  //auxiliary function for finding and recording the best mapping
  void extend_mapping(cis::hit const** query_module_hits,
                      int query_module_size,
                      cis::hit const** target_module_hits,
                      int target_module_size,
                      int query_index,
                      int * best_mapping);

  //auxiliary function for making a reversed module to query against
  void make_reverse_complement_module(cis::hit const** source_module_hits,
                                      int source_module_size,
                                      cis::hit const** dest_module_hits);

  void delete_module(cis::hit const** module, int module_size);
  
  int PermutationScore(int const* site_mapping, int query_module_size);
  
  int OffsetScore(int const* site_mapping, 
                  int query_module_size,
                  cis::hit const** query_module_hits,
                  cis::hit const** target_module_hits);
  
  int OrientationScore(int const* site_mapping, 
                       int query_module_size,
                       cis::hit const** query_module_hits,
                       cis::hit const** target_module_hits);


  float SumMotifSimilarity(int const* site_mapping,
                           int query_module_size,
                           cis::hit const** query_module_hits,
                           cis::hit const** target_module_hits);
    
  float MotifSimilarity(cis::hit const* query_hit, 
                        cis::hit const* target_hit);
  
  //calculate the similarity between two modules for the given mapping
  float MappedModuleSimilarity(int const* site_mapping,
                               int query_module_size,
                               cis::hit const** query_module_hits,
                               cis::hit const** target_module_hits);
  

};
#include "module_conservation.h"

#include "hit.h"
#include "dna_types.h"


ModuleConservation::ModuleConservation
(std::map<std::string, hit_trie *> const& patterns,
 float offset_score_weight,
 float permutation_score_weight,
 float orientation_score_weight,
 float hits_similarity_score_weight) :
  current_mapping(NULL), best_mapping(NULL),
  target_sites_used(NULL), best_score(-1.0),
  offset_score_weight(offset_score_weight),
  permutation_score_weight(permutation_score_weight),
  orientation_score_weight(orientation_score_weight),
  hits_similarity_score_weight(hits_similarity_score_weight),
  patterns(patterns) {
}


float ModuleConservation::ModuleSimilarity(cis::hit const** query_module_hits,
                                           int query_module_size,
                                           cis::hit const** target_module_hits,
                                           int target_module_size,
                                           int * best_mapping,
                                           bool * same_orientation){

  //initialize auxiliary variables
  int * best_mapping_forward = new int[query_module_size];
  int * best_mapping_reverse = new int[query_module_size];

  float best_score_forward;
  float best_score_reverse;

  cis::hit const** target_module_hits_reverse = 
    new cis::hit const*[target_module_size];

  this->current_mapping = new int[query_module_size]; //uninitialized okay.
  this->best_mapping = new int[query_module_size]; // uninitialized okay.
  this->target_sites_used = new bool[target_module_size];

  for (int ind=0; ind != target_module_size; ++ind)
    this->target_sites_used[ind] = false;

  this->best_score = -1.0; //scores are always positive (?)

  this->extend_mapping(query_module_hits, query_module_size,
                       target_module_hits, target_module_size, 0,
                       best_mapping_forward);
  
  best_score_forward = this->best_score;

  //now do the reverse mapping
  make_reverse_complement_module(target_module_hits, target_module_size,
                                 target_module_hits_reverse);

  for (int ind=0; ind != target_module_size; ++ind)
    this->target_sites_used[ind] = false;

  this->best_score = -1.0; //scores are always positive (?)

  this->extend_mapping(query_module_hits, query_module_size,
                       target_module_hits_reverse, target_module_size, 0,
                       best_mapping_reverse);
  
  best_score_reverse = this->best_score;

  delete this->current_mapping; this->current_mapping = NULL;
  delete this->best_mapping; this->best_mapping = NULL;
  delete this->target_sites_used; this->target_sites_used = NULL;
  delete best_mapping_forward;
  delete best_mapping_reverse;
  delete_module(target_module_hits_reverse, target_module_size);
  delete target_module_hits_reverse;

  *same_orientation = best_score_forward > best_score_reverse;

  if (*same_orientation) {
    for (int i = 0; i != query_module_size; ++i)
      best_mapping[i] = best_mapping_forward[i];
  } else {
    for (int i = 0; i != query_module_size; ++i)
      best_mapping[i] = target_module_size - 1 - best_mapping_reverse[i];
  }

  return this->best_score;
  
}


void ModuleConservation::extend_mapping(cis::hit const** query_module_hits,
                                        int query_module_size,
                                        cis::hit const** target_module_hits,
                                        int target_module_size,
                                        int query_index,
                                        int * best_mapping){
  
  if (query_index >= query_module_size) { 
    //condition:  'mapping' contains a valid mapping
    // of query and target sites.  the mapping will have -1's in places,
    // these are unmapped query sites.
    float current_score = 
      MappedModuleSimilarity(this->current_mapping,
                             query_module_size,
                             query_module_hits,
                             target_module_hits);

    if (current_score > this->best_score) {
      for (int ind=0; ind != query_module_size; ++ind)
        best_mapping[ind] = this->current_mapping[ind];
      best_score = current_score;
    }
  } else {
    this->current_mapping[query_index] = -1; // set to 'uninitialized'
    for (int ti = 0; ti != target_module_size; ++ti) { 
      // attempt to map an unused, type-matching target site
      if (target_module_hits[ti]->name() == 
          query_module_hits[query_index]->name() &&
          ! this->target_sites_used[ti]) {
        this->target_sites_used[ti] = true;
        this->current_mapping[query_index] = ti;
        break;
      }
    }
    extend_mapping(query_module_hits, query_module_size,
                   target_module_hits, target_module_size, 
                   query_index+1, best_mapping);
  }
}


//make a copy of each source hit, in an order and orientation that 
//represents the entire module reverse complemented
//dest_module_hits must contain enough pointers, but this function
//allocate the hits themselves
void 
ModuleConservation::make_reverse_complement_module
(cis::hit const** source_module_hit, int source_module_size,
 cis::hit const** dest_module_hit){
  
  //determine the bounds of the source
  int64_t high_bound = source_module_hit[source_module_size - 1]->end;

  for (int ind = 0; ind != source_module_size; ++ind){

    cis::hit const & s = *source_module_hit[source_module_size - 1 - ind];
    dest_module_hit[ind] = new 
      cis::hit(s.dna, cis::RevCompDNA(s.seq()), high_bound - s.end,
               high_bound - s.start, ComplementStrand(s.strand),
               s.score, s.id, s.name(), s.cluster_group);
  }
}


void ModuleConservation::delete_module(cis::hit const** module,
                                       int module_size){

  for (int ind = 0; ind != module_size; ++ind) delete module[ind];

}



//count the number of non-increasing pairs of indices in site_mapping
//this is the same as the number of strand breaks necessary to achieve the
//reaarrangement that the site_mapping represents.

int ModuleConservation::PermutationScore(int const* site_mapping,
                                         int query_module_size){

  int last_target_index = -1; // uninitialized                          
  int number_increasing_pairs = 0;

  for (int i=0; i < query_module_size; ++i){
    if (site_mapping[i] > last_target_index 
        && last_target_index != -1){
      ++number_increasing_pairs;
    }
    last_target_index = site_mapping[i];
  }
  return number_increasing_pairs;

}

    
//calculate the total displacement of each query site relative to
//the target site it is mapped to.
int ModuleConservation::OffsetScore(int const* site_mapping, 
                                    int query_module_size,
                                    cis::hit const** query_module_hits, 
                                    cis::hit const** target_module_hits){
  
  //first, calculate the average position
  int64_t * query_target_offset = new int64_t[query_module_size];
  int64_t * query_target_distance = new int64_t[query_module_size];
  int64_t total_offset = 0;
  int64_t total_distance = 0;
  int number_mapped = 0;

  for (int qi = 0; qi < query_module_size; ++qi){
    if (site_mapping[qi] != -1) {
      query_target_offset[qi] = 
        target_module_hits[site_mapping[qi]]->start - 
        query_module_hits[qi]->start;
      total_offset += query_target_offset[qi];
      number_mapped++;
    } 
  }
  int64_t average_offset = total_offset / number_mapped;

  //adjust query_target_offset
  for (int qi = 0; qi < query_module_size; ++qi){
    if (site_mapping[qi] != -1) {
      //the total distance of the displacement of each mapped site in
      //the context of the two modules.
      total_distance +=
        abs(query_target_offset[qi] - average_offset);
    }
  }

  delete query_target_offset;
  delete query_target_distance;
  
  return total_distance;
}


//count the number of preserved orientation site pairs
int ModuleConservation::OrientationScore(int const* site_mapping,
                                         int query_module_size,
                                         cis::hit const** query_module_hits,
                                         cis::hit const** target_module_hits){
  
  int number_same_strand_pairs;
  for (int qi = 0; qi != query_module_size; ++qi){
    if (site_mapping[qi] != -1){
      if (query_module_hits[qi]->strand ==
          target_module_hits[site_mapping[qi]]->strand)
        ++number_same_strand_pairs;
    }
  }
  return number_same_strand_pairs;

}


//simply a sum of similarities (or differences) on some scale...
//since different site types are different lengths, there should
//be some kind of normalization for the range of possible
//difference.
float ModuleConservation::SumMotifSimilarity(int const* site_mapping,
                                             int query_module_size,
                                             cis::hit const** query_module_hits,
                                             cis::hit const** target_module_hits){

  float sum_site_similarity = 0.0;
  for (int qi = 0; qi != query_module_size; ++qi){
    if (site_mapping[qi] != -1){
      sum_site_similarity += 
        MotifSimilarity(query_module_hits[qi], 
                        target_module_hits[site_mapping[qi]]);
    }
  }

  return sum_site_similarity;
  
}


//unfortunately the only way to achieve this similarity score is if you
//have access to the actual pattern tree that produced the score itself
//the hit information alone does not provide this.  therefore, you 
//must load it again from the file...

//So

//Using the appropriate pattern, generate a series of scores
//for all single-point mutation intermediates between the query_hit
//and target hit sequence.  There should be <edit_distance> + 1 of these.
//Then, calculate the sum of pairwise abs(differences) in score and return
//that, averaged over the length of the two hits
float ModuleConservation::MotifSimilarity(cis::hit const* query_hit, 
                                          cis::hit const* target_hit){
  
  assert(query_hit->name() == target_hit->name()); 
  //fails if we have terminal_id, must fix

  assert(query_hit->seq().size() == target_hit->seq().size());

  this->pattern_iter = this->patterns.find(query_hit->name());

  if (this->pattern_iter == this->patterns.end()){
    fprintf(stderr, 
            "MotifSimilarity: Couldn't find the hit pattern called %s.  Returning -1",
            query_hit->name().c_str());

    return -1.0;
  } else {
    //now generate the series scores, starting from the query site and ending
    //with the target site, each time, advancing to the next position where
    //they differ.
    hit_trie * patnode = (*pattern_iter).second;
    cis::dnastring current_sequence = query_hit->seq();
    cis::dnastring target_sequence = target_hit->seq();

    int site_length = query_hit->seq().size();
    int * score_path = new int[site_length];
    score_path[0] = query_hit->score;

    //populate score_path
    for (int current_index = 1; current_index != site_length; ++current_index){
      if (current_sequence[current_index] == target_sequence[current_index]){
        //no change, just use last score
        score_path[current_index] = score_path[current_index-1];
      } else {
        current_sequence[current_index] = target_sequence[current_index];
        score_path[current_index] = patnode->score(current_sequence);
      }
    }
    
    //calculate average absolute value of score change
    float sum_of_changes = 0;

    for (int i = 1; i != site_length; ++i)
      sum_of_changes += abs(score_path[i] - score_path[i-1]);

    float average_score_change = 
      sum_of_changes / static_cast<float>(site_length);

    delete score_path;
    
    return average_score_change;
    
  } 

}
//calculate the module similarity between two modules with the defined mapping
//no need for the target_module_size, the indices in 'site_mapping' are all
//within 'target_module_hits'
float ModuleConservation::MappedModuleSimilarity(int const* site_mapping,
                                                 int query_module_size,
                                                 cis::hit const** query_module_hits,
                                                 cis::hit const** target_module_hits){
  
  int offset_score = OffsetScore(site_mapping, query_module_size, 
                                 query_module_hits, target_module_hits);
  
  int permutation_score = PermutationScore(site_mapping, query_module_size);
  
  int orientation_score = 
    OrientationScore(site_mapping, query_module_size,
                     query_module_hits, target_module_hits);
  
  float hits_similarity_score = 
    SumMotifSimilarity(site_mapping, query_module_size,
                       query_module_hits, target_module_hits);
  
  return 
    this->offset_score_weight * offset_score +
    this->permutation_score_weight * permutation_score +
    this->orientation_score_weight * orientation_score +
    this->hits_similarity_score_weight * hits_similarity_score; 

  /* !!! Some linear combination of these four scores */
  
}
//the log conditional probability of two modules being truly orthologous
//with the specified one-to-one site mapping.
//this is an auxiliary function to be used with the recursive search for
//overall maximal log (P(orthology | query_module, target_module))
template <typename Collection>
void ComputeCurrentScores(MappingData * mapping,
                          Collection const& query_module,
                          Collection const& target_module,
                          SummaryScoreData * score){
  
  int offset_score = OffsetScore(mapping, query_module, target_module);
  
  float offset_odds = cis::InterpolatePoint(Collection::offset_curve,
                                            offset_score, -HUGE_VAL);
  
  int permutation_score = PermutationScore(mapping, query_module.size());
  
  float permutation_odds = cis::InterpolatePoint(Collection::permutation_curve,
                                                 permutation_score, -HUGE_VAL);
  
  float hits_similarity_score =
    MappedOrthologyScore(query_module, target_module, mapping);
  
  float hits_similarity_odds = 
    cis::InterpolatePoint(Collection::hits_similarity_curve,
                          hits_similarity_score, -HUGE_VAL);
  
  
  float frac_same_orientation = 
    FractionPreservedOrientation(mapping, query_module.size());
  
  float orientation_odds = cis::InterpolatePoint(Collection::orientation_curve,
                                                 frac_same_orientation, -HUGE_VAL);
    
  score->log_cond_prob_orthology =
    offset_odds + permutation_odds + orientation_odds + hits_similarity_odds;
  
}


//
template <typename Collection>
void ComputeCurrentScores(MappingData * mapping,
                          Collection const& query_module,
                          Collection const& target_module,
                          ComponentScoreData * score){

  score->offset_score = OffsetScore(mapping, query_module, target_module);
  
  score->permutation_score = PermutationScore(mapping, query_module.size());
  
  score->hits_similarity_score =
    MappedOrthologyScore(query_module, target_module, mapping);
  
  score->frac_same_orientation = 
    FractionPreservedOrientation(mapping, query_module.size());

}                          



//return the average corrected offset score
int OffsetScore(MappingData * mapping, int number_mapped, int query_size){

  return total_distance;
}



//return the number of mapped elements that are in order with the previous
float PermutationScore(MappingData * mapping, int query_size){

  int number_in_order = 0;
  
  for (int qi = 0; qi < query_size; ++qi)
    if (mapping[qi].target_used && 
        mapping[qi].in_order) number_in_order++;

  return number_in_order;
}



//calculate the total displacement of each query site relative to
//the target site it is mapped to.
template <typename Collection>
int OffsetScore(MappingData * mapping, 
                Collection const& query,
                Collection const& target){
  
 
  //first, calculate the average position
  int64_t * query_target_offset = new int64_t[query.size()];
  int64_t * query_target_distance = new int64_t[query.size()];
  int64_t total_offset = 0;
  int64_t total_distance = 0;
  int number_mapped = 0;

  for (int qi = 0; qi < query.size(); ++qi){
    if (mapping[qi].best_target_index != -1) {
      query_target_offset[qi] = 
        target.sites[mapping[qi].best_target_index]->start - 
        query.sites[qi]->start;
      total_offset += query_target_offset[qi];
      number_mapped++;
    } 
  }
  int64_t average_offset = total_offset / number_mapped;

  //adjust query_target_offset
  for (int qi = 0; qi < query.size(); ++qi){
    if (mapping[qi].best_target_index != -1) {
      //the total distance of the displacement of each mapped site in
      //the context of the two modules.
      total_distance +=
        abs(query_target_offset[qi] - average_offset);
    }
  }

  delete query_target_offset;
  delete query_target_distance;
  
  return total_distance;
}





int PermutationScore(MappingData * mapping, int query_size){
  
  int last_target_index = -1; // uninitialized                          
  int number_increasing_pairs = 0;
  
  for (int i=0; i < query_size; ++i){
    if (mapping[i].best_target_index > last_target_index 
        && last_target_index != -1){
      ++number_increasing_pairs;
    }
    last_target_index = mapping[i].best_target_index;
  }
  return number_increasing_pairs;
  
}


int OrientationScore(MappingData * mapping, int query_size){

  int number_same_orientation = 0;

  for (int qi = 0; qi < query_size; ++qi)
    if (mapping[qi].target_used &&
        mapping[qi].same_orientation)
      number_same_orientation++;

  return number_same_orientation;

}


//count the number of preserved orientation site pairs
float FractionPreservedOrientation(MappingData * mapping, int mapping_size){
  
  float number_same_orientation = 0.0;
  float total_number_pairs = 0.0;
  for (int qi = 0; qi != mapping_size; ++qi)
    if (mapping[qi].best_target_index != -1){
      number_same_orientation += 
        mapping[qi].same_orientation ? 1.0 : 0.0;
      total_number_pairs++;
    }

  return number_same_orientation / total_number_pairs;
}




//measures the overall orthology score between two collections of
//elements using the provided mapping also stores the array of
//pairwise_orientations of the elements
template <typename Collection>
float MappedOrthologyScore(Collection const& query,
                           Collection const& target,
                           MappingData * mapping){
  
  float num_mapped_sites = 0.0;
  float sum_site_similarity = 0.0;
  
  for (int qi = 0; qi != query.size(); ++qi){
    if (mapping[qi].best_target_index != -1){
      num_mapped_sites += 1.0;
      mapping[qi].log_orthology = 
        Collection::ElementOrthology(query.sites[qi],
                                     target.sites[mapping[qi].best_target_index],
                                     &mapping[qi].same_orientation);
      sum_site_similarity += mapping[qi].log_orthology;
    }
  }
  
  return sum_site_similarity / num_mapped_sites;
    
}




    //creates a new node with <r> as the interval, and inserts
    //it in the proper place in the tree.

    //traverses the set of children, finding the proper insertion point.
    //this is a tree organized so that the unique parent of a node shall be the
    //left-most interval that contains the node's interval.

    //how is the child getting a copy of itself?  by the copy constructor...
    //but when is the copy constructor being called?


    /*
      Analyzes the children of this tree node, and decides whether the given interval
      should be a peer, parent, or child of the children.

      It should be a peer iff there exist two adjacent intervals Aa < Rr < Bb in the children.
      It should be a parent iff 

      Look at the range of insertion points for the start and end of the query interval:

      lb_s, ub_s, lb_e, ub_e
      Cases:

      lb_s = ub_e:
      lb_e = lb_s and ub_s = ub_e:  insert as peer at lb_s
      ub_s < ub_e:  make q child of ub_s
      lb_s < lb_e:  make q child of lb_s

      lb_s < ub_e:   make range [lb_s, ub_e) children of q
      lb_s > ub_e:   make q a child of --lb_e


    */

  



    // !!! must test this
    void region_tree::insert_rec(region const* r){

        //assert(interval->dna == r->dna);

        std::pair<BY_START::iterator, BY_START::iterator> range_by_start = 
            children_by_start->equal_range(r->start);
    
        //NODE_END const& children_by_end = get<end_tag>(*children);

        pair<BY_END::iterator, BY_END::iterator> range_by_end = 
            children_by_end->equal_range(r->end);
    
        pair<BY_START::iterator, BY_START::iterator> range_by_end_projected = 
            std::make_pair(children_by_start->find((*range_by_end.first).first),
                           children_by_start->find((*range_by_end.second).first));
    
        //boost::multi_index::project<start_tag>(*children,e_tmp.first),
        //boost::multi_index::project<start_tag>(*children,e_tmp.second));

        BY_END::iterator lbound_by_end_orig = range_by_end.first;
        BY_END::iterator ubound_by_end_orig = range_by_end.second;

        BY_START::iterator end = children_by_start->end();

        BY_START::iterator lbound_by_start = range_by_start.first;
        BY_START::iterator ubound_by_start = range_by_start.second;

        BY_START::iterator lbound_by_end = range_by_end_projected.first;
        BY_START::iterator ubound_by_end = range_by_end_projected.second;

    
        //case 1:  
        if (lbound_by_start == ubound_by_end){
            if (lbound_by_start == lbound_by_end && 
                ubound_by_start == ubound_by_end) {
                region_tree * child = new region_tree(r);
                children_by_start->insert(lbound_by_start, std::make_pair(r->start, child));
                children_by_end->insert(lbound_by_end, std::make_pair(r->end, child));
            }

            else if (ubound_by_start != ubound_by_end) (*ubound_by_start).second->insert_rec(r);
            else if (lbound_by_start != lbound_by_end) (*lbound_by_start).second->insert_rec(r);
        }


        //case 2:  
        else if (less_iterator(lbound_by_start, ubound_by_end, end)){
            region_tree *nrt = new region_tree(r);
            nrt->children_by_start->insert(lbound_by_start, ubound_by_end);
            nrt->children_by_end->insert(lbound_by_start, ubound_by_end);

            children_by_start->erase(lbound_by_start, ubound_by_end);
            children_by_end->erase(lbound_by_end_orig, ubound_by_end_orig);

            children_by_start->insert(std::make_pair(r->start, nrt));
            children_by_end->insert(std::make_pair(r->end, nrt));
        }

        else (*--lbound_by_end).second->insert_rec(r);

        //check_tree(this);
		
    }	


    
    /*
      Get left and right immediate neighbors on either side of q that do not
      overlap q.  Return NULL for either or both if they do not exist...
    */
    pair<region const*, region const*> NonOverlappingNeighbors(BY_START const* nodes_by_start, 
                                                               BY_END const* nodes_by_end,
                                                               REG_CR query_interval){
        assert(nodes_by_start != NULL);

        BY_END::const_iterator lo_ef = nodes_by_end->lower_bound(query_interval.start);
        BY_START::const_iterator lo_sf = nodes_by_start->find((*lo_ef).first);
        BY_START::const_reverse_iterator lo(lo_sf);

        region const* left = 
            (lo == nodes_by_start->rend()) ? NULL : (*lo).second->interval;

        BY_START::const_iterator hi = nodes_by_start->upper_bound(query_interval.end);

        region const* right = 
            (hi == nodes_by_start->end()) ? NULL : (*hi).second->interval;
        return std::make_pair(left, right);
    }



    /*
      Associate every region with all regions within max_dist displacement
      Store in a simple set of pairs of region ids...

    */
    std::set<std::pair<region const*, region const*> > 
    GetAllNeighbors(REGIONS_MULTI const& regs,
                    region_tree const& tree, int max_dist){
	
        //for each region, find all neighbors
        REGIONS_MULTI nbors;
        std::set<std::pair<region const*, region const*> > pairs;

        REGIONS_MULTI::const_iterator regs_iter;
        for (regs_iter = regs.begin(); regs_iter != regs.end(); ++regs_iter){
            region const* r = *regs_iter;
            region q(r->dna, r->name, 
                     std::max(r->start - max_dist, int64_t(0)), 
                     std::min(r->end + max_dist, (int64_t)r->dna.length()), POS, 
                     GENERIC_REGION_ID, -1, region::NONE);
            nbors = IntervalOverlap(tree, q);

            for (regs_iter = nbors.begin(); regs_iter != nbors.end(); ++regs_iter){
                region const* f = *regs_iter;
                pairs.insert(std::make_pair(r, f));
            }
        }

        return pairs;

    }

