# $Id: Tree.pm 16123 2009-09-17 12:57:27Z cjfields $ # # BioPerl module for Bio::Tree::Tree # # Please direct questions and support issues to # # Cared for by Jason Stajich # # Copyright Jason Stajich # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Tree::Tree - An Implementation of TreeI interface. =head1 SYNOPSIS # like from a TreeIO my $treeio = Bio::TreeIO->new(-format => 'newick', -file => 'treefile.dnd'); my $tree = $treeio->next_tree; my @nodes = $tree->get_nodes; my $root = $tree->get_root_node; =head1 DESCRIPTION This object holds handles to Nodes which make up a tree. =head1 IMPLEMENTATION NOTE This implementation of Bio::Tree::Tree contains Bio::Tree:::NodeI; mainly linked via the root node. As NodeI can potentially contain circular references (as nodes will need to refer to both parent and child nodes), Bio::Tree::Tree will remove those circular references when the object is garbage-collected. This has some side effects; primarily, one must keep the Tree in scope or have at least one reference to it if working with nodes. The fix is to count the references to the nodes and if it is greater than expected retain all of them, but it requires an additional prereq and thus may not be worth the effort. This only shows up in minor edge cases, though (see Bug #2869). Example of issue: # tree is not assigned to a variable, so passes from memory after # root node is passed my $root = Bio::TreeIO->new(-format => 'newick', -file => 'foo.txt')->next_tree ->get_root_node; # gets nothing, as all Node links are broken when Tree is garbage-collected above my @descendents = $root->get_all_Descendents; =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/wiki/Mailing_lists - About the mailing lists =head2 Support Please direct usage questions or support issues to the mailing list: I rather than to the module maintainer directly. Many experienced and reponsive experts will be able look at the problem and quickly address it. Please include a thorough description of the problem with code and data examples if at all possible. =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via the web: http://bugzilla.open-bio.org/ =head1 AUTHOR - Jason Stajich Email jason@bioperl.org =head1 CONTRIBUTORS Aaron Mackey amackey@virginia.edu Sendu Bala bix@sendu.me.uk Mark A. Jensen maj@fortinbras.us =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Tree::Tree; use strict; # Object preamble - inherits from Bio::Root::Root use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI); =head2 new Title : new Usage : my $obj = Bio::Tree::Tree->new(); Function: Builds a new Bio::Tree::Tree object Returns : Bio::Tree::Tree Args : -root => L object which is the root OR -node => L object from which the root will be determined -nodelete => boolean, whether or not to try and cleanup all the nodes when this this tree goes out of scope. -id => optional tree ID -score => optional tree score value =cut sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); $self->{'_rootnode'} = undef; $self->{'_maxbranchlen'} = 0; $self->_register_for_cleanup(\&cleanup_tree); my ($root,$node,$nodel,$id,$score)= $self->_rearrange([qw(ROOT NODE NODELETE ID SCORE)], @args); if ($node && ! $root) { $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI'); my @lineage = $self->get_lineage_nodes($node); $root = shift(@lineage) || $node; # to stop us pulling in entire database of a Bio::Taxon when we later do # get_nodes() or similar, specifically set ancestor() for each node if ($node->isa('Bio::Taxon')) { push(@lineage, $node) unless $node eq $root; my $ancestor = $root; foreach my $lineage_node (@lineage) { $lineage_node->ancestor($ancestor); } continue { $ancestor = $lineage_node; } } } if ($root) { $self->set_root_node($root); } $self->nodelete($nodel || 0); $self->id($id) if defined $id; $self->score($score) if defined $score; return $self; } =head2 nodelete Title : nodelete Usage : $obj->nodelete($newval) Function: Get/Set Boolean whether or not to delete the underlying nodes when it goes out of scope. By default this is false meaning trees are cleaned up. Returns : boolean Args : on set, new boolean value =cut sub nodelete{ my $self = shift; return $self->{'nodelete'} = shift if @_; return $self->{'nodelete'}; } =head2 get_nodes Title : get_nodes Usage : my @nodes = $tree->get_nodes() Function: Return list of Bio::Tree::NodeI objects Returns : array of Bio::Tree::NodeI objects Args : (named values) hash with one value order => 'b|breadth' first order or 'd|depth' first order =cut sub get_nodes{ my ($self, @args) = @_; my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args); $order ||= 'depth'; $sortby ||= 'none'; my $node = $self->get_root_node || return; if ($order =~ m/^b|(breadth)$/oi) { my @children = ($node); for (@children) { push @children, $_->each_Descendent($sortby); } return @children; } if ($order =~ m/^d|(depth)$/oi) { # this is depth-first search I believe my @children = ($node,$node->get_all_Descendents($sortby)); return @children; } } =head2 get_root_node Title : get_root_node Usage : my $node = $tree->get_root_node(); Function: Get the Top Node in the tree, in this implementation Trees only have one top node. Returns : Bio::Tree::NodeI object Args : none =cut sub get_root_node{ my ($self) = @_; return $self->{'_rootnode'}; } =head2 set_root_node Title : set_root_node Usage : $tree->set_root_node($node) Function: Set the Root Node for the Tree Returns : Bio::Tree::NodeI Args : Bio::Tree::NodeI =cut sub set_root_node{ my $self = shift; if( @_ ) { my $value = shift; if( defined $value && ! $value->isa('Bio::Tree::NodeI') ) { $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI"); return $self->get_root_node; } $self->{'_rootnode'} = $value; } return $self->get_root_node; } =head2 total_branch_length Title : total_branch_length Usage : my $size = $tree->total_branch_length Function: Returns the sum of the length of all branches Returns : real Args : none =cut sub total_branch_length { shift->subtree_length } =head2 subtree_length Title : subtree_length Usage : my $subtree_size = $tree->subtree_length($internal_node) Function: Returns the sum of the length of all branches in a subtree under the node. Calculates the size of the whole tree without an argument (but only if root node is defined) Returns : real or undef Args : Bio::Tree::NodeI object, defaults to the root node =cut sub subtree_length { my $tree = shift; my $node = shift || $tree->get_root_node; return unless $node; my $sum = 0; for ( $node->get_all_Descendents ) { $sum += $_->branch_length || 0; } return $sum; } =head2 id Title : id Usage : my $id = $tree->id(); Function: An id value for the tree Returns : scalar Args : [optional] new value to set =cut sub id{ my ($self,$val) = @_; if( defined $val ) { $self->{'_treeid'} = $val; } return $self->{'_treeid'}; } =head2 score Title : score Usage : $obj->score($newval) Function: Sets the associated score with this tree This is a generic slot which is probably best used for log likelihood or other overall tree score Returns : value of score Args : newvalue (optional) =cut sub score{ my ($self,$val) = @_; if( defined $val ) { $self->{'_score'} = $val; } return $self->{'_score'}; } # decorated interface TreeI Implements this =head2 height Title : height Usage : my $height = $tree->height Function: Gets the height of tree - this LOG_2($number_nodes) WARNING: this is only true for strict binary trees. The TreeIO system is capable of building non-binary trees, for which this method will currently return an incorrect value!! Returns : integer Args : none =head2 number_nodes Title : number_nodes Usage : my $size = $tree->number_nodes Function: Returns the number of nodes in the tree Returns : integer Args : none =cut =head2 as_text Title : as_text Usage : my $tree_as_string = $tree->as_text($format) Function: Returns the tree as a string representation in the desired format (currently 'newick', 'nhx', or 'tabtree') Returns : scalar string Args : format type as specified by Bio::TreeIO Note : This method loads the Bio::TreeIO::$format module on the fly, and commandeers the _write_tree_Helper routine therein to create the tree string. =cut sub as_text { my $self = shift; my $format = shift; my @parms; my $iomod = "Bio::TreeIO::$format"; $self->_load_module($iomod); # following currently not really necessary, but who knows? my $io = $iomod->new(-format=>$format, -file=>File::Spec->devnull()); no strict "refs"; my $iowtH = *{$iomod."::_write_tree_Helper"}{CODE}; use strict "refs"; for ($format) { /newick/ && do { @parms = ( $io->bootstrap_style, $io->order_by, 0 ); last; }; /nhx/ && do { @parms = ( 0 ); last; }; /tabtree/ && do { @parms = ( "" ); last; }; # default $self->throw("as_text does not allow format '$format'") } # newline_each_node... my $data = [$iowtH->($self->get_root_node, @parms)]; if ($format eq 'tabtree') { return $$data[0]."\n"; } else { return join(",", @$data).";\n"; } } =head2 Methods for associating Tag/Values with a Tree These methods associate tag/value pairs with a Tree =head2 set_tag_value Title : set_tag_value Usage : $tree->set_tag_value($tag,$value) $tree->set_tag_value($tag,@values) Function: Sets a tag value(s) to a tree. Replaces old values. Returns : number of values stored for this tag Args : $tag - tag name $value - value to store for the tag =cut sub set_tag_value{ my ($self,$tag,@values) = @_; if( ! defined $tag || ! scalar @values ) { $self->warn("cannot call set_tag_value with an undefined value"); } $self->remove_tag ($tag); map { push @{$self->{'_tags'}->{$tag}}, $_ } @values; return scalar @{$self->{'_tags'}->{$tag}}; } =head2 add_tag_value Title : add_tag_value Usage : $tree->add_tag_value($tag,$value) Function: Adds a tag value to a tree Returns : number of values stored for this tag Args : $tag - tag name $value - value to store for the tag =cut sub add_tag_value{ my ($self,$tag,$value) = @_; if( ! defined $tag || ! defined $value ) { $self->warn("cannot call add_tag_value with an undefined value"); } push @{$self->{'_tags'}->{$tag}}, $value; return scalar @{$self->{'_tags'}->{$tag}}; } =head2 remove_tag Title : remove_tag Usage : $tree->remove_tag($tag) Function: Remove the tag and all values for this tag Returns : boolean representing success (0 if tag does not exist) Args : $tag - tagname to remove =cut sub remove_tag { my ($self,$tag) = @_; if( exists $self->{'_tags'}->{$tag} ) { $self->{'_tags'}->{$tag} = undef; delete $self->{'_tags'}->{$tag}; return 1; } return 0; } =head2 remove_all_tags Title : remove_all_tags Usage : $tree->remove_all_tags() Function: Removes all tags Returns : None Args : None =cut sub remove_all_tags{ my ($self) = @_; $self->{'_tags'} = {}; return; } =head2 get_all_tags Title : get_all_tags Usage : my @tags = $tree->get_all_tags() Function: Gets all the tag names for this Tree Returns : Array of tagnames Args : None =cut sub get_all_tags{ my ($self) = @_; my @tags = sort keys %{$self->{'_tags'} || {}}; return @tags; } =head2 get_tag_values Title : get_tag_values Usage : my @values = $tree->get_tag_values($tag) Function: Gets the values for given tag ($tag) Returns : Array of values or empty list if tag does not exist Args : $tag - tag name =cut sub get_tag_values{ my ($self,$tag) = @_; return wantarray ? @{$self->{'_tags'}->{$tag} || []} : (@{$self->{'_tags'}->{$tag} || []})[0]; } =head2 has_tag Title : has_tag Usage : $tree->has_tag($tag) Function: Boolean test if tag exists in the Tree Returns : Boolean Args : $tag - tagname =cut sub has_tag { my ($self,$tag) = @_; return exists $self->{'_tags'}->{$tag}; } # -- private internal methods -- sub cleanup_tree { my $self = shift; unless( $self->nodelete ) { for my $node ($self->get_nodes(-order => 'b', -sortby => 'none')) { #$node->ancestor(undef); #$node = undef; $node->node_cleanup; undef $node; } } $self->{'_rootnode'} = undef; } 1;