# $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $ # # BioPerl module for Bio::TreeIO::phyloxml # # Please direct questions and support issues to # # Cared for by Mira Han # # Copyright Mira Han # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format. =head1 SYNOPSIS # do not use this module directly use Bio::TreeIO; my $treeio = Bio::TreeIO->new(-format => 'phyloxml', -file => 'tree.dnd'); my $tree = $treeio->next_tree; =head1 DESCRIPTION This module handles parsing and writing of phyloXML format. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted viax the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Mira Han Email mirhan@indiana.edu =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::TreeIO::phyloxml; use strict; # Object preamble - inherits from Bio::Root::Root use Bio::Tree::Tree; use Bio::Tree::AnnotatableNode; use Bio::Annotation::SimpleValue; use Bio::Annotation::Relation; use XML::LibXML; use XML::LibXML::Reader; use base qw(Bio::TreeIO); sub _initialize { my($self, %args) = @_; $args{-treetype} ||= 'Bio::Tree::Tree'; $args{-nodetype} ||= 'Bio::Tree::AnnotatableNode'; $self->SUPER::_initialize(%args); # phyloxml TreeIO does not use SAX, # therefore no need to attach EventHandler # instead we will define a reader that is a pull-parser of libXML if ($self->mode eq 'r') { if ($self->_fh) { $self->{'_reader'} = XML::LibXML::Reader->new( IO => $self->_fh, no_blanks => 1 ); } if (!$self->{'_reader'}) { $self->throw("XML::LibXML::Reader not initialized"); } } elsif ($self->mode eq 'w') { # print default lines $self->_print('',"\n"); $self->_print('', "\n"); } $self->treetype($args{-treetype}); $self->nodetype($args{-nodetype}); $self->{'_lastitem'} = {}; # holds open items and the attribute hash $self->_init_func(); } sub _init_func { my ($self) = @_; my %start_elements = ( 'phylogeny' => \&element_phylogeny, 'clade' => \&element_clade, 'sequence_relation' => \&element_relation, 'clade_relation' => \&element_relation, ); $self->{'_start_elements'} = \%start_elements; my %end_elements = ( 'phylogeny' => \&end_element_phylogeny, 'clade' => \&end_element_clade, 'sequence_relation' => \&end_element_relation, 'clade_relation' => \&end_element_relation, ); $self->{'_end_elements'} = \%end_elements; } sub DESTROY { my $self = shift; if ($self->mode eq 'w') { $self->_print(''); $self->flush if $self->_flush_on_write && defined $self->_fh; } $self->SUPER::DESTROY; } =head2 next_tree Title : next_tree Usage : my $tree = $treeio->next_tree Function: Gets the next tree in the stream Returns : Bio::Tree::TreeI Args : none =cut sub next_tree { my ($self) = @_; my $reader = $self->{'_reader'}; my $tree; while ($reader->read) { if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT) { if ($reader->name eq 'phylogeny') { $tree = $self->end_element_phylogeny(); last; } } $self->processXMLNode; } return $tree; } =head2 add_phyloXML_annotation Title : add_phyloXML_annotation Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring) Function: add annotations to a node in the phyloXML format string Returns : the node that we added annotations to Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode) -xml => string in phyloXML format that describes the annotation for the node =cut sub add_phyloXML_annotation { my ($self, @args) = @_; my ($obj, $xml_string, $attr) = $self->_rearrange([qw(OBJ XML ATTR)], @args); $xml_string = ''.$xml_string.''; $self->debug( $xml_string ); $self->{'_reader'} = XML::LibXML::Reader->new( string => $xml_string, no_blanks => 1 ); my $reader = $self->{'_reader'}; $self->{'_currentannotation'} = []; # holds annotationcollection $self->{'_currenttext'} = ''; $self->{'_id_link'} = {}; # pretend we saw a element $self->{'_lastitem'}->{'clade'}++; push @{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes # our object to annotate (nodeI) # push into temporary list push @{$self->{'_currentitems'}}, $obj; $reader->read; #read away the first element 'phyloxml' while ($reader->read) { $self->processXMLNode; } # pop from temporary list pop @{$self->{'_currentitems'}}; $self->{'_lastitem'}->{ $reader->name }-- if $reader->name; pop @{$self->{'_lastitem'}->{'current'}}; return $obj; } =head2 write_tree Title : write_tree Usage : $treeio->write_tree($tree); Function: Write a tree out to data stream in phyloxml format Returns : none Args : Bio::Tree::TreeI object =cut sub write_tree { my ($self, @trees) = @_; foreach my $tree (@trees) { my $root = $tree->get_root_node; $self->_print("get_all_tags(); my $attr_str = ''; foreach my $tag (@tags) { my @values = $tree->get_tag_values($tag); foreach (@values) { $attr_str .= " ".$tag."=\"".$_."\""; } } $self->_print($attr_str); $self->_print(">"); if ($root->isa('Bio::Tree::AnnotatableNode')) { $self->_print($self->_write_tree_Helper_annotatableNode($root)); } else { $self->_print($self->_write_tree_Helper_generic($root)); } # print clade relations while (my $str = pop (@{$self->{'_tree_attr'}->{'clade_relation'}})) { $self->_print($str); } # print sequence relations while (my $str = pop (@{$self->{'_tree_attr'}->{'sequence_relation'}})) { $self->_print($str); } $self->_print(""); $self->_print("\n"); } $self->flush if $self->_flush_on_write && defined $self->_fh; return; } =head2 _write_tree_Helper_annotatableNode Title : _write_tree_Helper_annotatableNode Usage : internal method used by write_tree, not to be used directly Function: recursive helper function of write_tree for the annotatableNodes. translates annotations into xml elements. Returns : string describing the node Args : Bio::Node::AnnotatableNode object, string =cut sub _write_tree_Helper_annotatableNode { my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml my $ac = $node->annotation; # if clade_relation exists my @relations = $ac->get_Annotations('clade_relation'); foreach (@relations) { my $clade_rel = $self->_relation_to_string($node, $_, ''); # set as tree attr push (@{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel); } # start $str .= 'get_Annotations('_attr'); # check id_source if ($attr) { my ($id_source) = $attr->get_Annotations('id_source'); if ($id_source) { $str .= " id_source=\"".$id_source->value."\""; } } $str .= ">"; # print all descendent nodes foreach my $child ( $node->each_Descendent() ) { $str = $self->_write_tree_Helper_annotatableNode($child, $str); } # print all annotations $str = print_annotation( $node, $str, $ac ); # print all sequences if ($node->has_sequence) { foreach my $seq (@{$node->sequence}) { # if sequence_relation exists my @relations = $seq->annotation->get_Annotations('sequence_relation'); foreach (@relations) { my $sequence_rel = $self->_relation_to_string($seq, $_, ''); # set as tree attr push (@{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel); } $str = print_seq_annotation( $node, $str, $seq ); } } $str .= ""; return $str; } =head2 _write_tree_Helper_generic Title : _write_tree_Helper_generic Usage : internal method used by write_tree, not to be used directly Function: recursive helper function of write_tree for generic NodesI. all tags are translated into property elements. Returns : string describing the node Args : Bio::Node::NodeI object, string =cut sub _write_tree_Helper_generic { my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml # start $str .= ''; # print all descendent nodes foreach my $child ( $node->each_Descendent() ) { $str = $self->_write_tree_Helper_generic($child, $str); } # print all tags my @tags = $node->get_all_tags(); foreach my $tag (@tags) { my @values = $node->get_tag_values($tag); foreach my $val (@values) { $str .= " "; $str .= " "; } } # print NodeI features if ($node->id) { $str .= ""; $str .= $node->id; $str .= ""; } elsif ($node->branch_length) { $str .= ""; $str .= $node->branch_length; $str .= ""; } elsif ($node->bootstrap) { $str .= ""; $str .= $node->bootstrap; $str .= ""; } $str .= ""; return $str; } =head2 _relation_to_string Title : _relation_to_string Usage : internal method used by write_tree, not to be used directly Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements. Returns : string describing the node Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation, the Annotation::Relation object, the string =cut # It may be more appropriate to make Annotation::Relation have # a to_string callback function, # and have this subroutine set as the callback when we are in # phyloXML context. # I've put it here for now, since write_tree is the only place it is used. sub _relation_to_string { my ($self, $obj, $rel, $str) = @_; my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source if (@attr) { my @id_source = $attr[0]->get_Annotations('id_source'); } my ($id_ref_0) = $obj->annotation->get_nested_Annotations( '-keys' => ['id_source'], '-recursive' => 1); my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations( '-keys' => ['id_source'], '-recursive' => 1); $str .= "<"; $str .= $rel->tagname; $str .= " id_ref_0=\"".$id_ref_0->value."\""; $str .= " id_ref_1=\"".$id_ref_1->value."\""; $str .= " type=\"".$rel->type."\""; $str .= "/>"; return $str; } =head2 read_annotation Title : read_annotation Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1); Function: read text value (or attribute value) of the annotations corresponding to the element path Returns : list of text values of the annotations matching the path Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI) -path => path of the nested elements -attr => Boolean value to indicate whether to get the attribute of the element or the text value. (default is 0, meaning text value is returned) =cut # It may be more appropriate to make a separate Annotation::phyloXML object # and have this subroutine within that object so it can handle the # reading and writing of the values and attributes. # but since tagTree is a temporary stub and I didn't want to make # a redundant object I've put it here for now. sub read_annotation { my ($self, @args) = @_; my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args); my $ac = $obj->annotation; if ($attr) { my @elements = split ('/', $path); my $final = pop @elements; push (@elements, '_attr'); push (@elements, $final); $path = join ('/', @elements); return $self->_read_annotation_attr_Helper( [$ac], $path); } else { return $self->_read_annotation_text_Helper( [$ac], $path); } } sub _read_annotation_text_Helper { my ($self, $acs, $path) = @_; my @elements = split ('/', $path); my $key = shift @elements; my @nextacs = (); foreach my $ac (@$acs) { foreach my $ann ($ac->get_Annotations($key)) { if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)} } } if (@elements == 0) { my @values = (); my @texts = map {$_->get_Annotations('_text')} @nextacs; foreach (@texts) { $_ && push (@values, $_->value); } return @values; } else { $path = join ('/', @elements); return $self->_read_annotation_text_Helper( \@nextacs, $path); } } sub _read_annotation_attr_Helper { my ($self, $acs, $path) = @_; my @elements = split ('/', $path); my $key = shift @elements; my @nextacs = (); foreach my $ac (@$acs) { foreach my $ann ($ac->get_Annotations($key)) { if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)} } } if (@elements == 1) { my $attrname = $elements[0]; my @sv = map {$_->get_Annotations($attrname)} @nextacs; return map {$_->value} @sv; } else { $path = join ('/', @elements); return $self->_read_annotation_attr_Helper( \@nextacs, $path); } } =head1 Methods for parsing the XML document =cut =head2 processXMLNode Title : processXMLNode Usage : $treeio->processXMLNode Function: read the XML node and process according to the node type Returns : none Args : none =cut sub processXMLNode { my ($self) = @_; my $reader = $self->{'_reader'}; my $nodetype = $reader->nodeType; if ( $nodetype == XML_READER_TYPE_ELEMENT) { $self->{'_lastitem'}->{$reader->name}++; push @{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes if (exists $self->{'_start_elements'}->{$reader->name}) { my $method = $self->{'_start_elements'}->{$reader->name}; $self->$method(); } else { $self->element_default(); } if ($reader->isEmptyElement) { # element is complete # set nodetype so it can jump and # do procedures for XML_READER_TYPE_END_ELEMENT $nodetype = XML_READER_TYPE_END_ELEMENT; } } if ($nodetype == XML_READER_TYPE_TEXT) { $self->{'_currenttext'} = $reader->value; } if ($nodetype == XML_READER_TYPE_END_ELEMENT) { if (exists $self->{'_end_elements'}->{$reader->name}) { my $method = $self->{'_end_elements'}->{$reader->name}; $self->$method(); } else { $self->end_element_default(); } $self->{'_lastitem'}->{ $reader->name }--; pop @{$self->{'_lastitem'}->{'current'}}; $self->{'_currenttext'} = ''; } } =head2 processAttribute Title : processAttribute Usage : $treeio->processAttribute(\%hash_for_attribute); Function: reads the attributes of the current element into a hash Returns : none Args : hash reference where the attributes will be stored. =cut sub processAttribute { my ($self, $data) = @_; my $reader = $self->{'_reader'}; # several ways of reading attributes: # read all attributes: if ($reader-> moveToFirstAttribute) { do { $data->{$reader->name()} = $reader->value; } while ($reader-> moveToNextAttribute); $reader-> moveToElement; } } =head2 element_phylogeny Title : element_phylogeny Usage : $treeio->element_phylogeny Function: initialize the parsing of a tree Returns : none Args : none =cut sub element_phylogeny { my ($self) = @_; $self->{'_currentitems'} = []; # holds nodes while parsing current level $self->{'_currentnodes'} = []; # holds nodes while constructing tree $self->{'_currentannotation'} = []; # holds annotationcollection $self->{'_currenttext'} = ''; $self->{'_levelcnt'} = []; $self->{'_id_link'} = {}; $self->{'_tree_attr'} = $self->current_attr; $self->processAttribute($self->current_attr); return; } =head2 end_element_phylogeny Title : end_element_phylogeny Usage : $treeio->end_element_phylogeny Function: ends the parsing of a tree building a Tree::TreeI object. Returns : Tree::TreeI Args : none =cut sub end_element_phylogeny { my ($self) = @_; my $root; # if there is more than one node in _currentnodes # aggregate the nodes into trees basically ad-hoc. if ( @{$self->{'_currentnodes'}} > 1) { $root = $self->nodetype->new( -id => '', tostring => \&node_to_string, ); while ( @{$self->{'_currentnodes'}} ) { my ($node) = ( shift @{$self->{'_currentnodes'}}); $root->add_Descendent($node); } } # if there is only one node in _currentnodes # that node is root. elsif ( @{$self->{'_currentnodes'}} == 1) { $root = shift @{$self->{'_currentnodes'}}; } my $tree = $self->treetype->new( -root => $root, -id => $self->current_attr->{'name'}, %{$self->current_attr} ); foreach my $tag ( keys %{$self->current_attr} ) { $tree->add_tag_value( $tag, $self->current_attr->{$tag} ); } return $tree; } =head2 element_clade Title : element_clade Usage : $treeio->element_clade Function: initialize the parsing of a node creates a new AnnotatableNode with annotations Returns : none Args : none =cut sub element_clade { my ($self) = @_; my $reader = $self->{'_reader'}; my %clade_attr = (); # doesn't use current attribute in order to save memory $self->processAttribute(\%clade_attr); # create a node (Annotatable Node) my $tnode = $self->nodetype->new( -id => '', tostring => \&node_to_string, %clade_attr, ); # add all attributes as annotation collection with tag '_attr' my $ac = $tnode->annotation; my $newattr = Bio::Annotation::Collection->new(); foreach my $tag (keys %clade_attr) { my $sv = Bio::Annotation::SimpleValue->new( -value => $clade_attr{$tag} ); $newattr->add_Annotation($tag, $sv); } $ac->add_Annotation('_attr', $newattr); # if there is id_source add clade to _id_link if (exists $clade_attr{'id_source'}) { $self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode; } # push into temporary list push @{$self->{'_currentitems'}}, $tnode; } =head2 end_element_clade Title : end_element_clade Usage : $treeio->end_element_clade Function: ends the parsing of a node Returns : none Args : none =cut sub end_element_clade { my ($self) = @_; my $reader = $self->{'_reader'}; my $curcount = scalar @{$self->{'_currentnodes'}}; my $level = $reader->depth() - 2; my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0; # pop from temporary list my $tnode = pop @{$self->{'_currentitems'}}; if ( $childcnt > 0) { if( $childcnt > $curcount) { $self->throw("something wrong with event construction treelevel ". "$level is recorded as having $childcnt nodes ". "but current nodes at this level is $curcount\n"); } my @childnodes = splice( @{$self->{'_currentnodes'}}, - $childcnt); for ( @childnodes ) { $tnode->add_Descendent($_); } $self->{'_levelcnt'}->[$level+1] = 0; } push @{$self->{'_currentnodes'}}, $tnode; $self->{'_levelcnt'}->[$level]++; } =head2 element_relation Title : element_relation Usage : $treeio->element_relation Function: starts the parsing of clade relation & sequence relation Returns : none Args : none =cut sub element_relation { my ($self) = @_; $self->processAttribute($self->current_attr); } =head2 end_element_relation Title : end_element_relation Usage : $treeio->end_element_relation Function: ends the parsing of clade relation & sequence relation Returns : none Args : none =cut sub end_element_relation { my ($self) = @_; my $relationtype = $self->current_attr->{'type'}; my $id_ref_0 = $self->current_attr->{'id_ref_0'}; my $id_ref_1 = $self->current_attr->{'id_ref_1'}; my @srcbyidref = (); $srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0}; $srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1}; # exception when id_ref is defined but id_src is not, or vice versa. if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) { $self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]); } # set id_ref_0 my $ac0 = $srcbyidref[0]->annotation; my $newann = Bio::Annotation::Relation->new( '-type' => $relationtype, '-to' => $srcbyidref[1], '-tagname' => $self->current_element ); $ac0->add_Annotation($self->current_element, $newann); # set id_ref_1 my $ac1 = $srcbyidref[1]->annotation; $newann = Bio::Annotation::Relation->new( '-type' => $relationtype, '-to' => $srcbyidref[0], '-tagname' => $self->current_element ); $ac1->add_Annotation($self->current_element, $newann); } =head2 element_default Title : element_default Usage : $treeio->element_default Function: starts the parsing of all other elements Returns : none Args : none =cut sub element_default { my ($self) = @_; my $reader = $self->{'_reader'}; my $current = $self->current_element(); my $prev = $self->prev_element(); # read attributes of element $self->processAttribute($self->current_attr); # check idref my $idref = ''; if (exists $self->current_attr->{'id_ref'}) { $idref = $self->current_attr->{'id_ref'}; } my $srcbyidref = ''; $srcbyidref = $self->{'_id_link'}->{$idref}; # exception when id_ref is defined but id_src is not, or vice versa. if ($idref xor $srcbyidref) { $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref"); } # we are annotating a Node # set _currentannotation if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) { # find node to annotate my $tnode; if ($srcbyidref) { $tnode = $srcbyidref; } else { $tnode = $self->{'_currentitems'}->[-1]; } my $ac = $tnode->annotation(); # add the new anncollection with the current element as key my $newann = Bio::Annotation::Collection->new(); $ac->add_Annotation($current, $newann); # push to current annotation push (@{$self->{'_currentannotation'}}, $newann); } # we are already within an annotation else { my $ac = $self->{'_currentannotation'}->[-1]; if ($ac) { # add the new anncollection with the current element as key my $newann = Bio::Annotation::Collection->new(); $ac->add_Annotation($current, $newann); push (@{$self->{'_currentannotation'}}, $newann); } } } =head2 end_element_default Title : end_element_default Usage : $treeio->end_element_default Function: ends the parsing of all other elements Returns : none Args : none =cut sub end_element_default { my ($self) = @_; my $reader = $self->{'_reader'}; my $current = $self->current_element(); my $prev = $self->prev_element(); # check idsrc my $idsrc = $self->current_attr->{'id_source'}; # check idref my $idref = ''; if (exists $self->current_attr->{'id_ref'}) { $idref = $self->current_attr->{'id_ref'}; delete $self->current_attr->{'id_ref'}; } my $srcbyidref = ''; $srcbyidref = $self->{'_id_link'}->{$idref}; # exception when id_ref is defined but id_src is not, or vice versa. if ($idref xor $srcbyidref) { $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref"); } # we are annotating a Tree if ((!$srcbyidref) && $prev eq 'phylogeny') { # annotate Tree via tree attribute $self->prev_attr->{$current} = $self->{'_currenttext'}; } # we are within sequence_relation or clade_relation elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') { # we are here only with if ($current eq 'confidence') { # need to take care of confidence # not implemented yet.. } else { $self->throw($current, " is not allowed within <*_relation>"); } } # we are annotating a Node if (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade')) { # pop from current annotation my $ac = pop (@{$self->{'_currentannotation'}}); $self->annotateNode( $current, $ac); # additional setups for compatibility with NodeI my $tnode; if ($srcbyidref) { $tnode = $srcbyidref; } else { $tnode = $self->{'_currentitems'}->[-1]; } if ($current eq 'name') { $tnode->id($self->{'_currenttext'}); } elsif ($current eq 'branch_length') { $tnode->branch_length($self->{'_currenttext'}); } elsif ($current eq 'confidence') { if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) { $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation) } } elsif ($current eq 'sequence') { # if annotation is # transform the Bio::Annotation object into a Bio::Seq object my $str = ''; # retrieve the sequence if (my ($molseq) = $ac->get_Annotations('mol_seq')) { my ($strac) = $molseq->get_Annotations('_text'); $str = $strac->value(); } # create Seq object with sequence my $newseq = Bio::Seq->new( -seq => $str, -annotation=>$ac, -nowarnonempty=>1); $tnode->sequence($newseq); $ac->remove_Annotations('mol_seq'); $tnode->annotation->remove_Annotations($current); # if there is id_source add sequence to _id_link if ($idsrc) { $self->{'_id_link'}->{$idsrc} = $newseq; } } elsif ($idsrc && $current eq 'taxonomy') { # if there is id_source add sequence to _id_link $self->{'_id_link'}->{$idsrc} = $ac; } } # we are within a default Annotation else { my $ac = pop (@{$self->{'_currentannotation'}}); if ($ac) { $self->annotateNode( $current, $ac); } } } =head2 annotateNode Title : annotateNode Usage : $treeio->annotateNode($element, $ac) Function: adds text value and attributes to the AnnotationCollection that has element name as key. If there are nested elements, optional AnnotationCollections are added recursively, with the nested element name as key. The structure of each AnnotationCollection is 'element' => AnnotationCollection { '_text' => SimpleValue (text value) '_attr' => AnnotationCollection { attribute1 => SimpleValue (attribute value 1) attribute2 => SimpleValue (attribute value 2) ... } ['nested element' => AnnotationCollection ] } Returns : none Args : none =cut sub annotateNode { my ($self, $element, $newac) = @_; # if attribute exists then add Annotation::Collection with tag '_attr' if ( scalar keys %{$self->current_attr} ) { my $newattr = Bio::Annotation::Collection->new(); foreach my $tag (keys %{$self->current_attr}) { my $sv = Bio::Annotation::SimpleValue->new( -value => $self->current_attr->{$tag} ); $newattr->add_Annotation($tag, $sv); } $newac->add_Annotation('_attr', $newattr); } # if text exists add text as SimpleValue with tag '_text' if ( $self->{'_currenttext'} ) { my $newvalue = Bio::Annotation::SimpleValue->new( -value => $self->{'_currenttext'} ); $newac->add_Annotation('_text', $newvalue); } } =head1 Methods for exploring the document =cut =head2 current_attr Title : current_attr Usage : $attr_hash = $treeio->current_attr; Function: returns the attribute hash for current item Returns : reference of the attribute hash Args : none =cut sub current_attr { my ($self) = @_; return 0 if ! defined $self->{'_lastitem'} || ! defined $self->{'_lastitem'}->{'current'}->[-1]; my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]}; (@keys == 1) || die "there should be only one key for each hash"; return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]}; } =head2 prev_attr Title : prev_attr Usage : $hash_ref = $treeio->prev_attr Function: returns the attribute hash for previous item Returns : reference of the attribute hash Args : none =cut sub prev_attr { my ($self) = @_; return 0 if ! defined $self->{'_lastitem'} || ! defined $self->{'_lastitem'}->{'current'}->[-2]; my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]}; (@keys == 1) || die "there should be only one key for each hash"; return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]}; } =head2 current_element Title : current_element Usage : $element = $treeio->current_element Function: returns the name of the current element Returns : string (element name) Args : none =cut sub current_element { my ($self) = @_; return 0 if ! defined $self->{'_lastitem'} || ! defined $self->{'_lastitem'}->{'current'}->[-1]; my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]}; (@keys == 1) || die "there should be only one key for each hash"; return $keys[0]; } =head2 prev_element Title : prev_element Usage : $element = $treeio->current_element Function: returns the name of the previous element Returns : string (element name) Args : none =cut sub prev_element { my ($self) = @_; return 0 if ! defined $self->{'_lastitem'} || ! defined $self->{'_lastitem'}->{'current'}->[-2]; my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]}; (@keys == 1) || die "there should be only one key for each hash"; return $keys[0]; } =head2 treetype Title : treetype Usage : $obj->treetype($newval) Function: returns the tree type (default is Bio::Tree::Tree) Returns : value of treetype Args : newvalue (optional) =cut sub treetype{ my ($self,$value) = @_; if( defined $value) { $self->{'treetype'} = $value; } return $self->{'treetype'}; } =head2 nodetype Title : nodetype Usage : $obj->nodetype($newval) Function: returns the node type (default is Bio::Node::AnnotatableNode) Returns : value of nodetype Args : newvalue (optional) =cut sub nodetype{ my ($self,$value) = @_; if( defined $value) { $self->{'nodetype'} = $value; } return $self->{'nodetype'}; } =head1 Methods for implementing to_string callback for AnnotatableNode =cut =head2 node_to_string Title : node_to_string Usage : $annotatablenode->to_string_callback(\&node_to_string) Function: set as callback in AnnotatableNode, prints the node information in string Returns : string of node information Args : none =cut # this function is similar to _write_tree_Helper_annotatableNode, # but it is not recursive sub node_to_string { my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode # not a Bio::TreeIO::phyloxml my $str = ''; my $ac = $self->annotation; # start $str .= 'get_Annotations('_attr'); # check id_source if (@attr) { my @id_source = $attr[0]->get_Annotations('id_source'); if (@id_source) { $str .= " id_source=\"".$id_source[0]->value."\""; } } $str .= '>'; # print all annotations $str = print_annotation( $self, $str, $ac ); # print all sequences if ($self->has_sequence) { foreach my $seq (@{$self->sequence}) { $str = print_seq_annotation( $self, $str, $seq ); } } $str .= ''; return $str; } =head2 print_annotation Title : print_annotation Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection) Function: prints the annotationCollection in a phyloXML format. Returns : string of annotation information Args : string to which the Annotation should be concatenated to, annotationCollection that holds the Annotations =cut # Again, it may be more appropriate to make a separate Annotation::phyloXML object # and have this subroutine within that object so it can handle the # reading and writing of the values and attributes. # especially since this function is used both by # Bio::TreeIO::phyloxml (through write_tree) and # Bio::Node::AnnotatableNode (through node_to_string). # but since tagTree is a temporary stub and I didn't want to make # a redundant object I've put it here for now. sub print_annotation { my ($self, $str, $ac) = @_; my @all_anns = $ac->get_Annotations(); foreach my $ann (@all_anns) { my $key = $ann->tagname; if ($key eq '_attr') { next; } # attributes are already printed in the previous level if ($ann->isa('Bio::Annotation::SimpleValue')) { if ($key eq '_text') { $str .= $ann->value; } else { $str .= "<$key>"; $str .= $ann->value; $str .= ""; } } elsif ($ann->isa('Bio::Annotation::Collection')) { my @attrs = $ann->get_Annotations('_attr'); if (@attrs) { # if there is a attribute collection $str .= "<$key"; $str = print_attr($self, $str, $attrs[0]); $str .= ">"; } else { $str .= "<$key>"; } $str = print_annotation($self, $str, $ann); $str .= ""; } } return $str; } =head2 print_attr Title : print_attr Usage : $str = $annotatablenode->print_attr($str, $annotationcollection) Function: prints the annotationCollection in a phyloXML format. Returns : string of attributes Args : string to which the Annotation should be concatenated to, AnnotationCollection that holds the attributes =cut # Again, it may be more appropriate to make a separate Annotation::phyloXML object # and have this subroutine within that object so it can handle the # reading and writing of the values and attributes. # especially since this function is used both by # Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode # (through print_annotation). # but since tagTree is a temporary stub and I didn't want to make # a redundant object I've put it here for now. sub print_attr { my ($self, $str, $ac) = @_; my @all_attrs = $ac->get_Annotations(); foreach my $attr (@all_attrs) { if (!$attr->isa('Bio::Annotation::SimpleValue')) { $self->throw("attribute should be a SimpleValue"); } $str .= ' '; $str .= $attr->tagname; $str .= '='; $str .= $attr->value; } return $str; } =head2 print_sequence_annotation Title : print_sequence_annotation Usage : $str = $node->print_seq_annotation( $str, $seq ); Function: prints the Bio::Seq object associated with the node in a phyloXML format. Returns : string that describes the sequence Args : string to which the Annotation should be concatenated to, Seq object to print in phyloXML =cut # Again, it may be more appropriate to make a separate Annotation::phyloXML object # and have this subroutine within that object so it can handle the # reading and writing of the values and attributes. # especially since this function is used both by # Bio::TreeIO::phyloxml (through write_tree) and # Bio::Node::AnnotatableNode (through node_to_string). # but since tagTree is a temporary stub and I didn't want to make # a redundant object I've put it here for now. sub print_seq_annotation { my ($self, $str, $seq) = @_; $str .= "annotation->get_Annotations('_attr'); # check id_source if ($attr) { my ($id_source) = $attr->get_Annotations('id_source'); if ($id_source) { $str .= " id_source=\"".$id_source->value."\""; } } $str .= ">"; my @all_anns = $seq->annotation->get_Annotations(); foreach my $ann (@all_anns) { my $key = $ann->tagname; if ($key eq '_attr') { next; } # attributes are already printed in the previous level if ($ann->isa('Bio::Annotation::SimpleValue')) { if ($key eq '_text') { $str .= $ann->value; } else { $str .= "<$key>"; $str .= $ann->value; $str .= ""; } } elsif ($ann->isa('Bio::Annotation::Collection')) { my @attrs = $ann->get_Annotations('_attr'); if (@attrs) { # if there is a attribute collection $str .= "<$key"; $str = print_attr($self, $str, $attrs[0]); $str .= ">"; } else { $str .= "<$key>"; } $str = print_annotation($self, $str, $ann); $str .= ""; } } # print mol_seq if ($seq->seq()) { $str .= ""; $str .= $seq->seq(); $str .= ""; } $str .= ""; return $str; } 1;