#include #include #include #include #include #include "dna.h" using std::string; namespace cis { string dna_t::iupac = "XACMGRSVTWYHKDBN"; string dna_t::iupac_comp = "XTGKCYSBAWRDMHVN"; //operators for dna_t bool operator==(dna_t const& d1, dna_t const& d2) { return d1.species() == d2.species() && d1.name == d2.name && d1.length() == d2.length(); } bool operator!=(dna_t const& d1, dna_t const& d2) { return ! (d1 == d2); } bool operator<(dna_t const& d1, dna_t const& d2) { return d1.species() < d2.species() || (d1.species() == d2.species() && (d1.name < d2.name)); } dna_t::dna_t(std::string const& path, string const& sp, string const& n, int64_t const ss, int64_t const se) : seek_start_pos(ss), seek_end_pos(se) { species(sp); source(named_stream(path)); if (seek_start_pos > seek_end_pos) { cerr<<"start position ("<>sp >>name >>seek_start_pos >>seek_end_pos >>filename; i.seekg(1,std::ios::cur); //go past the newline species(sp); source(dna_t::named_stream (std::string(dna_directory + std::string("/") + filename))); //source().safe_open(); } bool less_dna_ptr::operator()(cis::dna_t const* d1, cis::dna_t const* d2) const { return *d1 < *d2; } bool less_dna::operator()(dna_t const& d1, dna_t const& d2) const { return d1 < d2; } ostream& operator<<(ostream &o, dna_t const& d){ o<>(istream &i, dna_t & d){ string sp; std::string path; i>>sp >>d.name >>d.seek_start_pos >>d.seek_end_pos >>path; d.species(sp); d.source(dna_t::named_stream(path)); d.source().safe_open(); return i; } cis::dnastring RevCompDNA(cis::dnastring const& seq){ cis::dnastring rev(seq); std::reverse(rev.begin(), rev.end()); int len=seq.length(),i; for (i=0; i < len; i++) rev[i] = CompNUC(rev[i]); return rev; } //preserves case while reverse complementing string RevCompStr(string const& seq){ string rev(seq); std::reverse(rev.begin(), rev.end()); int len=seq.length(),i; for (i=0; i < len; i++) rev[i] = CompChar(rev[i]); return rev; } //This reverses the bit order of first four bits NUC CompNUC(NUC n) { static NUC comp[] = { X,T,G,K,C,Y,S,B,A,W,R,D,M,H,V,N }; return comp[n]; } char Nuc2char(NUC d){ if (d >= 16){ std::fprintf(stderr, "Nuc2char: non-nucleotide stored."); assert(false); } static char chars[] = "XACMGRSVTWYHKDBN*"; return chars[static_cast(d)]; } char Nuc2lcchar(NUC d){ if (d >= 16){ std::fprintf(stderr, "Nuc2char: non-nucleotide stored."); assert(false); } static char chars[] = "xacmgrsvtwyhkdbn*"; return chars[static_cast(d)]; } NUC char2NUC(unsigned char c){ static NUC nucs[] = { z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,A,B,C,D,z,z,G, H,z,z,K,z,M,N,z, z,z,R,S,T,z,V,W, X,Y,z,z,z,z,z,z, z,A,B,C,D,z,z,G, H,z,z,K,z,M,N,z, z,z,R,S,T,z,V,W, X,Y,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z, z,z,z,z,z,z,z,z }; static std::set seen_chars; if (nucs[static_cast(c)] == z){ if (seen_chars.find(c) == seen_chars.end()){ seen_chars.insert(c); std::fprintf(stderr, "char2NUC: nuc %c converted to X. Further warnings disabled...\n", c); } return X; } else return nucs[static_cast(c)]; } //complements the nucleotide or consensus, preserving case. char CompChar(char c){ int lower_adjust = static_cast(c - 'a') <= static_cast('z' - 'a') ? 'a' - 'A' : 0; char comp = Nuc2char(CompNUC(char2NUC(c))); //will be upper case return comp + lower_adjust; } void TranslateSeq(NUC *const iseq, char const*const seq, int const size){ for (int i=0; i < size; i++) iseq[i] = char2NUC(seq[i]); } string FromDNAString(cis::dnastring const& d){ string s; s.resize(d.size()); for (unsigned int i=0; i < s.size(); i++) s[i] = Nuc2char(d[i]); return s; } string FromDNAString(cis::NUC const*const sequence, int size){ string s; s.resize(size); for (size_t i=0; i < s.size(); i++) s[i] = Nuc2char(sequence[i]); return s; } cis::dnastring ToDNAString(string const s){ cis::dnastring d; d.resize(s.size()); for (unsigned int i=0; i < d.size(); i++) d[i] = char2NUC(s[i]); return d; } cis::dnastring ToDNAString(char const*const s, int sz){ cis::dnastring d; d.resize(sz); for (int i=0; i < sz; i++) d[i] = char2NUC(s[i]); return d; } //output the actual sequence associated with the region string dna_t::sequence(uint64_t start, uint64_t end) const { std::ostringstream os; int64_t size = end - start; if (size <= 0){ cerr<<"end position of "< 10000000) { cerr<<"A maximum size of 10,000,000 is allowed for outputting raw dna sequence"<>i == 1) { break; } return (NUC)i; } } } //namespace cis