#include #include #include #include #include "dnacol.h" #include "dna.h" typedef cis::dna_collection DC; DC::dna_collection(DNAS const& d) : DNAS(d) {} void DC::open_dnas(){ for (cis::DNAS::iterator dna_it = (*this).begin(); dna_it != (*this).end(); ++dna_it) (*dna_it)->source().safe_open(); } void DC::calc_offsets(){ int64_t gp = 0; for (cis::DNAS::iterator dna_it = (*this).begin(); dna_it != (*this).end(); ++dna_it){ offsets[(*dna_it)] = gp; offsets_inv[gp] = (*dna_it); gp += (*dna_it)->length(); } total_length_ = gp; } //return the index of a dna / locus pair int64_t DC::ToIndex(cis::dna_t const* dna, int64_t locus) const { OIT it = offsets.find(dna); assert(it != offsets.end()); return (*it).second + locus; } int64_t DC::ToIndex(DNAS::const_iterator dna_iterator, int64_t locus) const { if (dna_iterator == this->end()) return total_length_; else return offsets.find(*dna_iterator)->second + locus; } int64_t DC::TotalLength(DNAS::const_iterator start_dna, DNAS::const_iterator end_dna) const { return ToIndex(end_dna, 0) - ToIndex(start_dna, 0); } //return the dna / locus pair from an index std::pair DC::FromIndex(int64_t index) const { OIIT it = offsets_inv.upper_bound(index); //first element whose key > k. we want the one before it it--; return std::make_pair((*it).second, index - (*it).first); } //scan the fasta file and measure the length of each sequence void DC::add(string const species, string const& fasta_dir, string const& fasta_file){ std::ostringstream errmsg; std::string fasta_path(fasta_dir + std::string("/") + fasta_file); FILE *ff = fopen(fasta_path.c_str(), "r"); //std::ifstream ff(fasta_path.c_str()); if (ff == NULL) { errmsg<<"Couldn't open Fasta sequence file (option -s): "<%s", name)){ //if (1 != sscanf(chunk, ">%s", name)){ cerr<<"Error reading header line:\n"<chrI\n"<insert(new dna_t(fasta_path, species, name, start, end)); } //ff.close(); fclose(ff); } int64_t DC::num_bases() const { int64_t sz = 0; for (cis::DNAS::iterator dna_it = (*this).begin(); dna_it != (*this).end(); ++dna_it) sz += (*dna_it)->length(); return sz; } namespace cis { cis::dna_t const* GetDNAByName(DNAS const& dnas, string const& species, string const& sdna){ dna_t const dna("", species, sdna); DNAS::const_iterator dna_i = dnas.find(&dna); cis::dna_t const* dnap = NULL; if (dna_i == dnas.end()){ static int warn_count = 0; if (warn_count < 10) { cerr<<"Warning: annotation is on " <