// *************************************************************************** // SamSequenceDictionary.cpp (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- // Last modified: 16 October 2011 (DB) // --------------------------------------------------------------------------- // Provides methods for operating on a collection of SamSequence entries. // ************************************************************************* #include "api/SamSequenceDictionary.h" using namespace BamTools; #include using namespace std; /*! \class BamTools::SamSequenceDictionary \brief Container of SamSequence entries. Provides methods for operating on a collection of SamSequence entries. */ /*! \fn SamSequenceDictionary::SamSequenceDictionary(void) \brief constructor */ SamSequenceDictionary::SamSequenceDictionary(void) { } /*! \fn SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other) \brief copy constructor */ SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other) : m_data(other.m_data) , m_lookupData(other.m_lookupData) { } /*! \fn SamSequenceDictionary::~SamSequenceDictionary(void) \brief destructor */ SamSequenceDictionary::~SamSequenceDictionary(void) { } /*! \fn void SamSequenceDictionary::Add(const SamSequence& sequence) \brief Appends a sequence to the dictionary. Duplicate entries are silently discarded. \param[in] sequence entry to be added */ void SamSequenceDictionary::Add(const SamSequence& sequence) { if ( IsEmpty() || !Contains(sequence) ) { m_data.push_back(sequence); m_lookupData[sequence.Name] = m_data.size() - 1; } } /*! \fn void SamSequenceDictionary::Add(const std::string& name, const int& length) \brief Appends a sequence to the dictionary. This is an overloaded function. \param[in] name name of sequence entry to be added \param[in] length length of sequence entry to be added \sa Add() */ void SamSequenceDictionary::Add(const std::string& name, const int& length) { Add( SamSequence(name, length) ); } /*! \fn void SamSequenceDictionary::Add(const SamSequenceDictionary& sequences) \brief Appends another sequence dictionary to this one This is an overloaded function. \param[in] sequences sequence dictionary to be appended \sa Add() */ void SamSequenceDictionary::Add(const SamSequenceDictionary& sequences) { SamSequenceConstIterator seqIter = sequences.ConstBegin(); SamSequenceConstIterator seqEnd = sequences.ConstEnd(); for ( ; seqIter != seqEnd; ++seqIter ) Add(*seqIter); } /*! \fn void SamSequenceDictionary::Add(const std::vector& sequences) \brief Appends multiple sequences to the dictionary. This is an overloaded function. \param[in] sequences entries to be added \sa Add() */ void SamSequenceDictionary::Add(const std::vector& sequences) { vector::const_iterator seqIter = sequences.begin(); vector::const_iterator seqEnd = sequences.end(); for ( ; seqIter!= seqEnd; ++seqIter ) Add(*seqIter); } /*! \fn void SamSequenceDictionary::Add(const std::map& sequenceMap) \brief Appends multiple sequences to the dictionary. This is an overloaded function. \param[in] sequenceMap map of sequence entries (name => length) to be added \sa Add() */ void SamSequenceDictionary::Add(const std::map& sequenceMap) { map::const_iterator seqIter = sequenceMap.begin(); map::const_iterator seqEnd = sequenceMap.end(); for ( ; seqIter != seqEnd; ++seqIter ) { const string& name = (*seqIter).first; const int& length = (*seqIter).second; Add( SamSequence(name, length) ); } } /*! \fn SamSequenceIterator SamSequenceDictionary::Begin(void) \return an STL iterator pointing to the first sequence \sa ConstBegin(), End() */ SamSequenceIterator SamSequenceDictionary::Begin(void) { return m_data.begin(); } /*! \fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const \return an STL const_iterator pointing to the first sequence This is an overloaded function. \sa ConstBegin(), End() */ SamSequenceConstIterator SamSequenceDictionary::Begin(void) const { return m_data.begin(); } /*! \fn void SamSequenceDictionary::Clear(void) \brief Clears all sequence entries. */ void SamSequenceDictionary::Clear(void) { m_data.clear(); m_lookupData.clear(); } /*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const \return an STL const_iterator pointing to the first sequence \sa Begin(), ConstEnd() */ SamSequenceConstIterator SamSequenceDictionary::ConstBegin(void) const { return m_data.begin(); } /*! \fn SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const \return an STL const_iterator pointing to the imaginary entry after the last sequence \sa End(), ConstBegin() */ SamSequenceConstIterator SamSequenceDictionary::ConstEnd(void) const { return m_data.end(); } /*! \fn bool SamSequenceDictionary::Contains(const std::string& sequenceName) const \brief Returns true if dictionary contains sequence. \param[in] sequenceName search for sequence matching this name \return \c true if dictionary contains a sequence with this name */ bool SamSequenceDictionary::Contains(const std::string& sequenceName) const { return ( m_lookupData.find(sequenceName) != m_lookupData.end() ); } /*! \fn bool SamSequenceDictionary::Contains(const SamSequence& sequence) const \brief Returns true if dictionary contains sequence (matches on name). This is an overloaded function. \param[in] sequence search for this sequence \return \c true if dictionary contains sequence (matching on name) */ bool SamSequenceDictionary::Contains(const SamSequence& sequence) const { return Contains(sequence.Name); } /*! \fn SamSequenceIterator SamSequenceDictionary::End(void) \return an STL iterator pointing to the imaginary entry after the last sequence \sa Begin(), ConstEnd() */ SamSequenceIterator SamSequenceDictionary::End(void) { return m_data.end(); } /*! \fn SamSequenceConstIterator SamSequenceDictionary::End(void) const \return an STL const_iterator pointing to the imaginary entry after the last sequence This is an overloaded function. \sa Begin(), ConstEnd() */ SamSequenceConstIterator SamSequenceDictionary::End(void) const { return m_data.end(); } /*! \fn bool SamSequenceDictionary::IsEmpty(void) const \brief Returns \c true if dictionary contains no sequences \sa Size() */ bool SamSequenceDictionary::IsEmpty(void) const { return m_data.empty(); } /*! \fn void SamSequenceDictionary::Remove(const SamSequence& sequence) \brief Removes sequence from dictionary, if found (matches on name). This is an overloaded function. \param[in] sequence SamSequence to remove (matching on name) */ void SamSequenceDictionary::Remove(const SamSequence& sequence) { Remove(sequence.Name); } /*! \fn void SamSequenceDictionary::Remove(const std::string& sequenceName) \brief Removes sequence from dictionary, if found. \param[in] sequenceName name of sequence to remove \sa Remove() */ void SamSequenceDictionary::Remove(const std::string& sequenceName) { // skip if empty dictionary or if name unknown if ( IsEmpty() || !Contains(sequenceName) ) return; // update 'lookup index' for every entry after @sequenceName const size_t indexToRemove = m_lookupData[sequenceName]; const size_t numEntries = m_data.size(); for ( size_t i = indexToRemove+1; i < numEntries; ++i ) { const SamSequence& sq = m_data.at(i); --m_lookupData[sq.Name]; } // erase entry from containers m_data.erase( Begin() + indexToRemove ); m_lookupData.erase(sequenceName); } /*! \fn void SamSequenceDictionary::Remove(const std::vector& sequences) \brief Removes multiple sequences from dictionary. This is an overloaded function. \param[in] sequences sequences to remove \sa Remove() */ void SamSequenceDictionary::Remove(const std::vector& sequences) { vector::const_iterator rgIter = sequences.begin(); vector::const_iterator rgEnd = sequences.end(); for ( ; rgIter!= rgEnd; ++rgIter ) Remove(*rgIter); } /*! \fn void SamSequenceDictionary::Remove(const std::vector& sequenceNames) \brief Removes multiple sequences from dictionary. This is an overloaded function. \param[in] sequenceNames names of the sequences to remove \sa Remove() */ void SamSequenceDictionary::Remove(const std::vector& sequenceNames) { vector::const_iterator rgIter = sequenceNames.begin(); vector::const_iterator rgEnd = sequenceNames.end(); for ( ; rgIter!= rgEnd; ++rgIter ) Remove(*rgIter); } /*! \fn int SamSequenceDictionary::Size(void) const \brief Returns number of sequences in dictionary. \sa IsEmpty() */ int SamSequenceDictionary::Size(void) const { return m_data.size(); } /*! \fn SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) \brief Retrieves the modifiable SamSequence that matches \a sequenceName. \note If the dictionary contains no sequence matching this name, this function inserts a new one with this name (length:0), and returns a reference to it. If you want to avoid this insertion behavior, check the result of Contains() before using this operator. \param[in] sequenceName name of sequence to retrieve \return a modifiable reference to the SamSequence associated with the name */ SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) { if ( !Contains(sequenceName) ) { SamSequence seq(sequenceName, 0); m_data.push_back(seq); m_lookupData[sequenceName] = m_data.size() - 1; } const size_t index = m_lookupData[sequenceName]; return m_data.at(index); }