=head1 LICENSE Copyright (c) 1999-2013 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at . Questions may also be sent to the Ensembl help desk at . =cut =head1 NAME Bio::EnsEMBL::Mapper =head1 SYNOPSIS $map = Bio::EnsEMBL::Mapper->new( 'rawcontig', 'chromosome' ); # add a coodinate mapping - supply two pairs or coordinates $map->add_map_coordinates( $contig_id, $contig_start, $contig_end, $contig_ori, $chr_name, chr_start, $chr_end ); # map from one coordinate system to another my @coordlist = $mapper->map_coordinates( 627012, 2, 5, -1, "rawcontig" ); =head1 DESCRIPTION Generic mapper to provide coordinate transforms between two disjoint coordinate systems. This mapper is intended to be 'context neutral' - in that it does not contain any code relating to any particular coordinate system. This is provided in, for example, Bio::EnsEMBL::AssemblyMapper. =head1 METHODS =cut package Bio::EnsEMBL::Mapper; use strict; use integer; use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump); use Bio::EnsEMBL::Mapper::Pair; use Bio::EnsEMBL::Mapper::IndelPair; use Bio::EnsEMBL::Mapper::Unit; use Bio::EnsEMBL::Mapper::Coordinate; use Bio::EnsEMBL::Mapper::IndelCoordinate; use Bio::EnsEMBL::Mapper::Gap; use Bio::EnsEMBL::Utils::Exception qw(throw); # use Data::Dumper; =head2 new Arg [1] : string $from The name of the 'from' coordinate system Arg [2] : string $to The name of the 'to' coordinate system Arg [3] : (optional) Bio::EnsEMBL::CoordSystem $from_cs The 'from' coordinate system Arg [4] : (optional) Bio::EnsEMBL::CoordSystem $to_cs Example : my $mapper = Bio::EnsEMBL::Mapper->new('FROM', 'TO'); Description: Constructor. Creates a new Bio::EnsEMBL::Mapper object. Returntype : Bio::EnsEMBL::Mapper Exceptions : none Caller : general =cut sub new { my ( $proto, $from, $to, $from_cs, $to_cs ) = @_; if ( !defined($to) || !defined($from) ) { throw("Must supply 'to' and 'from' tags"); } my $class = ref($proto) || $proto; my $self = bless( { "_pair_$from" => {}, "_pair_$to" => {}, 'pair_count' => 0, 'to' => $to, 'from' => $from, 'to_cs' => $to_cs, 'from_cs' => $from_cs }, $class ); # do sql to get any componente with muliple assemblys. return $self; } =head2 flush Args : none Example : none Description: removes all cached information out of this mapper Returntype : none Exceptions : none Caller : AssemblyMapper, ChainedAssemblyMapper =cut sub flush { my $self = shift; my $from = $self->from(); my $to = $self->to(); $self->{"_pair_$from"} = {}; $self->{"_pair_$to"} = {}; $self->{'pair_count'} = 0; } =head2 map_coordinates Arg 1 string $id id of 'source' sequence Arg 2 int $start start coordinate of 'source' sequence Arg 3 int $end end coordinate of 'source' sequence Arg 4 int $strand raw contig orientation (+/- 1) Arg 5 string $type nature of transform - gives the type of coordinates to be transformed *from* Function generic map method Returntype array of Bio::EnsEMBL::Mapper::Coordinate and/or Bio::EnsEMBL::Mapper::Gap Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub map_coordinates { my ( $self, $id, $start, $end, $strand, $type ) = @_; unless ( defined($id) && defined($start) && defined($end) && defined($strand) && defined($type) ) { throw("Expecting 5 arguments"); } # special case for handling inserts: if ( $start == $end + 1 ) { return $self->map_insert( $id, $start, $end, $strand, $type ); } if ( !$self->{'_is_sorted'} ) { $self->_sort() } my $hash = $self->{"_pair_$type"}; my ( $from, $to, $cs ); if ( $type eq $self->{'to'} ) { $from = 'to'; $to = 'from'; $cs = $self->{'from_cs'}; } else { $from = 'from'; $to = 'to'; $cs = $self->{'to_cs'}; } unless ( defined $hash ) { throw("Type $type is neither to or from coordinate systems"); } if ( !defined $hash->{ uc($id) } ) { # one big gap! my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end ); return $gap; } my $last_used_pair; my @result; my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord ); my $lr = $hash->{ uc($id) }; $start_idx = 0; $end_idx = $#{$lr}; # binary search the relevant pairs # helps if the list is big while ( ( $end_idx - $start_idx ) > 1 ) { $mid_idx = ( $start_idx + $end_idx ) >> 1; $pair = $lr->[$mid_idx]; $self_coord = $pair->{$from}; if ( $self_coord->{'end'} < $start ) { $start_idx = $mid_idx; } else { $end_idx = $mid_idx; } } my $rank = 0; my $orig_start = $start; my $last_target_coord = undef; for ( my $i = $start_idx; $i <= $#{$lr}; $i++ ) { $pair = $lr->[$i]; my $self_coord = $pair->{$from}; my $target_coord = $pair->{$to}; # # But not the case for haplotypes!! need to test for this case??? # so removing this till a better solution is found # # # if($self_coord->{'start'} < $start){ # $start = $orig_start; # $rank++; # } if ( defined($last_target_coord) and $target_coord->{'id'} ne $last_target_coord ) { if ( $self_coord->{'start'} < $start ) { # i.e. the same bit is being mapped to another assembled bit $start = $orig_start; } } else { $last_target_coord = $target_coord->{'id'}; } # if we haven't even reached the start, move on if ( $self_coord->{'end'} < $orig_start ) { next; } # if we have over run, break if ( $self_coord->{'start'} > $end ) { last; } if ( $start < $self_coord->{'start'} ) { # gap detected my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $self_coord->{'start'} - 1, $rank ); push( @result, $gap ); $start = $gap->{'end'} + 1; } my ( $target_start, $target_end, $target_ori ); my $res; if ( exists $pair->{'indel'} ) { # When next pair is an IndelPair and not a Coordinate, create the # new mapping Coordinate, the IndelCoordinate. $target_start = $target_coord->{'start'}; $target_end = $target_coord->{'end'}; #create a Gap object my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, ( $self_coord->{'end'} < $end ? $self_coord->{'end'} : $end ) ); #create the Coordinate object my $coord = Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'}, $target_start, $target_end, $pair->{'ori'}*$strand, $cs ); #and finally, the IndelCoordinate object with $res = Bio::EnsEMBL::Mapper::IndelCoordinate->new( $gap, $coord ); } else { # start is somewhere inside the region if ( $pair->{'ori'} == 1 ) { $target_start = $target_coord->{'start'} + ( $start - $self_coord->{'start'} ); } else { $target_end = $target_coord->{'end'} - ( $start - $self_coord->{'start'} ); } # Either we are enveloping this map or not. If yes, then end # point (self perspective) is determined solely by target. If # not we need to adjust. if ( $end > $self_coord->{'end'} ) { # enveloped if ( $pair->{'ori'} == 1 ) { $target_end = $target_coord->{'end'}; } else { $target_start = $target_coord->{'start'}; } } else { # need to adjust end if ( $pair->{'ori'} == 1 ) { $target_end = $target_coord->{'start'} + ( $end - $self_coord->{'start'} ); } else { $target_start = $target_coord->{'end'} - ( $end - $self_coord->{'start'} ); } } $res = Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'}, $target_start, $target_end, $pair->{'ori'}*$strand, $cs, $rank ); } ## end else [ if ( exists $pair->{'indel'...})] push( @result, $res ); $last_used_pair = $pair; $start = $self_coord->{'end'} + 1; } ## end for ( my $i = $start_idx...) if ( !defined $last_used_pair ) { my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end ); push( @result, $gap ); } elsif ( $last_used_pair->{$from}->{'end'} < $end ) { # gap at the end my $gap = Bio::EnsEMBL::Mapper::Gap->new( $last_used_pair->{$from}->{'end'} + 1, $end, $rank ); push( @result, $gap ); } if ( $strand == -1 ) { @result = reverse(@result); } return @result; } ## end sub map_coordinates =head2 map_insert Arg [1] : string $id Arg [2] : int $start - start coord. Since this is an insert should always be one greater than end. Arg [3] : int $end - end coord. Since this is an insert should always be one less than start. Arg [4] : int $strand (0, 1, -1) Arg [5] : string $type - the coordinate system name the coords are from. Arg [6] : boolean $fastmap - if specified, this is being called from the fastmap call. The mapping done is not any faster for inserts, but the return value is different. Example : Description: This is in internal function which handles the special mapping case for inserts (start = end +1). This function will be called automatically by the map function so there is no reason to call it directly. Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and/or Gap objects Exceptions : none Caller : map_coordinates() =cut sub map_insert { my ($self, $id, $start, $end, $strand, $type, $fastmap) = @_; # swap start/end and map the resultant 2bp coordinate ($start, $end) =($end,$start); my @coords = $self->map_coordinates($id, $start, $end, $strand, $type); if(@coords == 1) { my $c = $coords[0]; # swap start and end to convert back into insert ($c->{'start'}, $c->{'end'}) = ($c->{'end'}, $c->{'start'}); } else { throw("Unexpected: Got ",scalar(@coords)," expected 2.") if(@coords != 2); # adjust coordinates, remove gaps my ($c1, $c2); if($strand == -1) { ($c2,$c1) = @coords; } else { ($c1, $c2) = @coords; } @coords = (); if(ref($c1) eq 'Bio::EnsEMBL::Mapper::Coordinate') { # insert is after first coord if($c1->{'strand'} * $strand == -1) { $c1->{'end'}--; } else { $c1->{'start'}++; } @coords = ($c1); } if(ref($c2) eq 'Bio::EnsEMBL::Mapper::Coordinate') { # insert is before second coord if($c2->{'strand'} * $strand == -1) { $c2->{'start'}++; } else { $c2->{'end'}--; } if($strand == -1) { unshift @coords, $c2; } else { push @coords, $c2; } } } if($fastmap) { return undef if(@coords != 1); my $c = $coords[0]; return ($c->{'id'}, $c->{'start'}, $c->{'end'}, $c->{'strand'}, $c->{'coord_system'}); } return @coords; } =head2 fastmap Arg 1 string $id id of 'source' sequence Arg 2 int $start start coordinate of 'source' sequence Arg 3 int $end end coordinate of 'source' sequence Arg 4 int $strand raw contig orientation (+/- 1) Arg 5 int $type nature of transform - gives the type of coordinates to be transformed *from* Function inferior map method. Will only do ungapped unsplit mapping. Will return id, start, end strand in a list. Returntype list of results Exceptions none Caller Bio::EnsEMBL::AssemblyMapper =cut sub fastmap { my ($self, $id, $start, $end, $strand, $type) = @_; my ($from, $to, $cs); if($end+1 == $start) { return $self->map_insert($id, $start, $end, $strand, $type, 1); } if( ! $self->{'_is_sorted'} ) { $self->_sort() } if($type eq $self->{'to'}) { $from = 'to'; $to = 'from'; $cs = $self->{'from_cs'}; } else { $from = 'from'; $to = 'to'; $cs = $self->{'to_cs'}; } my $hash = $self->{"_pair_$type"} or throw("Type $type is neither to or from coordinate systems"); my $pairs = $hash->{uc($id)}; foreach my $pair (@$pairs) { my $self_coord = $pair->{$from}; my $target_coord = $pair->{$to}; # only super easy mapping is done if( $start < $self_coord->{'start'} || $end > $self_coord->{'end'} ) { next; } if( $pair->{'ori'} == 1 ) { return ( $target_coord->{'id'}, $target_coord->{'start'}+$start-$self_coord->{'start'}, $target_coord->{'start'}+$end-$self_coord->{'start'}, $strand, $cs ); } else { return ( $target_coord->{'id'}, $target_coord->{'end'} - ($end - $self_coord->{'start'}), $target_coord->{'end'} - ($start - $self_coord->{'start'}), -$strand, $cs ); } } return (); } =head2 add_map_coordinates Arg 1 int $id id of 'source' sequence Arg 2 int $start start coordinate of 'source' sequence Arg 3 int $end end coordinate of 'source' sequence Arg 4 int $strand relative orientation of source and target (+/- 1) Arg 5 int $id id of 'target' sequence Arg 6 int $start start coordinate of 'target' sequence Arg 7 int $end end coordinate of 'target' sequence Function Stores details of mapping between 'source' and 'target' regions. Returntype none Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub add_map_coordinates { my ( $self, $contig_id, $contig_start, $contig_end, $contig_ori, $chr_name, $chr_start, $chr_end ) = @_; unless ( defined($contig_id) && defined($contig_start) && defined($contig_end) && defined($contig_ori) && defined($chr_name) && defined($chr_start) && defined($chr_end) ) { throw("7 arguments expected"); } if ( ( $contig_end - $contig_start ) != ( $chr_end - $chr_start ) ) { throw("Cannot deal with mis-lengthed mappings so far"); } my $from = Bio::EnsEMBL::Mapper::Unit->new( $contig_id, $contig_start, $contig_end ); my $to = Bio::EnsEMBL::Mapper::Unit->new( $chr_name, $chr_start, $chr_end ); my $pair = Bio::EnsEMBL::Mapper::Pair->new( $from, $to, $contig_ori ); # place into hash on both ids my $map_to = $self->{'to'}; my $map_from = $self->{'from'}; push( @{ $self->{"_pair_$map_to"}->{ uc($chr_name) } }, $pair ); push( @{ $self->{"_pair_$map_from"}->{ uc($contig_id) } }, $pair ); $self->{'pair_count'}++; $self->{'_is_sorted'} = 0; } ## end sub add_map_coordinates =head2 add_indel_coordinates Arg 1 int $id id of 'source' sequence Arg 2 int $start start coordinate of 'source' sequence Arg 3 int $end end coordinate of 'source' sequence Arg 4 int $strand relative orientation of source and target (+/- 1) Arg 5 int $id id of 'targe' sequence Arg 6 int $start start coordinate of 'targe' sequence Arg 7 int $end end coordinate of 'targe' sequence Function stores details of mapping between two regions: 'source' and 'target'. Returns 1 if the pair was added, 0 if it was already in. Used when adding an indel Returntype int 0,1 Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub add_indel_coordinates{ my ($self, $contig_id, $contig_start, $contig_end, $contig_ori, $chr_name, $chr_start, $chr_end) = @_; unless(defined($contig_id) && defined($contig_start) && defined($contig_end) && defined($contig_ori) && defined($chr_name) && defined($chr_start) && defined($chr_end)) { throw("7 arguments expected"); } #we need to create the IndelPair object to add to both lists, to and from my $from = Bio::EnsEMBL::Mapper::Unit->new($contig_id, $contig_start, $contig_end); my $to = Bio::EnsEMBL::Mapper::Unit->new($chr_name, $chr_start, $chr_end); my $pair = Bio::EnsEMBL::Mapper::IndelPair->new($from, $to, $contig_ori); # place into hash on both ids my $map_to = $self->{'to'}; my $map_from = $self->{'from'}; push( @{$self->{"_pair_$map_to"}->{uc($chr_name)}}, $pair ); push( @{$self->{"_pair_$map_from"}->{uc($contig_id)}}, $pair ); $self->{'pair_count'}++; $self->{'_is_sorted'} = 0; return 1; } =head2 map_indel Arg [1] : string $id Arg [2] : int $start - start coord. Since this is an indel should always be one greater than end. Arg [3] : int $end - end coord. Since this is an indel should always be one less than start. Arg [4] : int $strand (0, 1, -1) Arg [5] : string $type - the coordinate system name the coords are from. Example : @coords = $mapper->map_indel(); Description: This is in internal function which handles the special mapping case for indels (start = end +1). It will be used to map from a coordinate system with a gap to another that contains an insertion. It will be mainly used by the Variation API. Returntype : Bio::EnsEMBL::Mapper::Unit objects Exceptions : none Caller : general =cut sub map_indel { my ( $self, $id, $start, $end, $strand, $type ) = @_; # swap start/end and map the resultant 2bp coordinate ( $start, $end ) = ( $end, $start ); if ( !$self->{'_is_sorted'} ) { $self->_sort() } my $hash = $self->{"_pair_$type"}; my ( $from, $to, $cs ); if ( $type eq $self->{'to'} ) { $from = 'to'; $to = 'from'; $cs = $self->{'from_cs'}; } else { $from = 'from'; $to = 'to'; $cs = $self->{'to_cs'}; } unless ( defined $hash ) { throw("Type $type is neither to or from coordinate systems"); } my @indel_coordinates; my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord ); my $lr = $hash->{ uc($id) }; $start_idx = 0; $end_idx = $#{$lr}; # binary search the relevant pairs # helps if the list is big while ( ( $end_idx - $start_idx ) > 1 ) { $mid_idx = ( $start_idx + $end_idx ) >> 1; $pair = $lr->[$mid_idx]; $self_coord = $pair->{$from}; if ( $self_coord->{'end'} <= $start ) { $start_idx = $mid_idx; } else { $end_idx = $mid_idx; } } for ( my $i = $start_idx; $i <= $#{$lr}; $i++ ) { $pair = $lr->[$i]; my $self_coord = $pair->{$from}; my $target_coord = $pair->{$to}; if ( exists $pair->{'indel'} ) { #need to return unit coordinate my $to = Bio::EnsEMBL::Mapper::Unit->new( $target_coord->{'id'}, $target_coord->{'start'}, $target_coord->{'end'}, ); push @indel_coordinates, $to; last; } } return @indel_coordinates; } ## end sub map_indel =head2 add_Mapper Arg 1 Bio::EnsEMBL::Mapper $mapper2 Example $mapper->add_Mapper($mapper2) Function add all the map coordinates from $mapper to this mapper. This object will contain mapping pairs from both the old object and $mapper2. Returntype int 0,1 Exceptions throw if 'to' and 'from' from both Bio::EnsEMBL::Mappers are incompatible Caller $mapper->methodname() =cut sub add_Mapper{ my ($self, $mapper) = @_; my $mapper_to = $mapper->{'to'}; my $mapper_from = $mapper->{'from'}; if ($mapper_to ne $self->{'to'} or $mapper_from ne $self->{'from'}) { throw("Trying to add an incompatible Mapper"); } my $count_a = 0; foreach my $seq_name (keys %{$mapper->{"_pair_$mapper_to"}}) { push(@{$self->{"_pair_$mapper_to"}->{$seq_name}}, @{$mapper->{"_pair_$mapper_to"}->{$seq_name}}); $count_a += scalar(@{$mapper->{"_pair_$mapper_to"}->{$seq_name}}); } my $count_b = 0; foreach my $seq_name (keys %{$mapper->{"_pair_$mapper_from"}}) { push(@{$self->{"_pair_$mapper_from"}->{$seq_name}}, @{$mapper->{"_pair_$mapper_from"}->{$seq_name}}); $count_b += scalar(@{$mapper->{"_pair_$mapper_from"}->{$seq_name}}); } if ($count_a == $count_b) { $self->{'pair_count'} += $count_a; } else { throw("Trying to add a funny Mapper"); } $self->{'_is_sorted'} = 0; return 1; } =head2 list_pairs Arg 1 int $id id of 'source' sequence Arg 2 int $start start coordinate of 'source' sequence Arg 3 int $end end coordinate of 'source' sequence Arg 4 string $type nature of transform - gives the type of coordinates to be transformed *from* Function list all pairs of mappings in a region Returntype list of Bio::EnsEMBL::Mapper::Pair Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub list_pairs { my ( $self, $id, $start, $end, $type ) = @_; if ( !$self->{'_is_sorted'} ) { $self->_sort() } if ( !defined $type ) { throw("Expected 4 arguments"); } if ( $start > $end ) { throw( "Start is greater than end " . "for id $id, start $start, end $end\n" ); } my $hash = $self->{"_pair_$type"}; my ( $from, $to ); if ( $type eq $self->{'to'} ) { $from = 'to'; $to = 'from'; } else { $from = 'from'; $to = 'to'; } unless ( defined $hash ) { throw("Type $type is neither to or from coordinate systems"); } my @list; unless ( exists $hash->{ uc($id) } ) { return (); } @list = @{ $hash->{ uc($id) } }; my @output; if ( $start == -1 && $end == -1 ) { return @list; } else { foreach my $p (@list) { if ( $p->{$from}->{'end'} < $start ) { next; } if ( $p->{$from}->{'start'} > $end ) { last; } push( @output, $p ); } return @output; } } ## end sub list_pairs =head2 to Arg 1 Bio::EnsEMBL::Mapper::Unit $id id of 'source' sequence Function accessor method form the 'source' and 'target' in a Mapper::Pair Returntype Bio::EnsEMBL::Mapper::Unit Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub to { my ( $self, $value ) = @_; if ( defined($value) ) { $self->{'to'} = $value; } return $self->{'to'}; } =head2 from Arg 1 Bio::EnsEMBL::Mapper::Unit $id id of 'source' sequence Function accessor method form the 'source' and 'target' in a Mapper::Pair Returntype Bio::EnsEMBL::Mapper::Unit Exceptions none Caller Bio::EnsEMBL::Mapper =cut sub from { my ( $self, $value ) = @_; if ( defined($value) ) { $self->{'from'} = $value; } return $self->{'from'}; } # _dump # # Arg 1 *FileHandle $fh # Function convenience dump function # possibly useful for debugging # Returntype none # Exceptions none # Caller internal # sub _dump{ my ($self,$fh) = @_; if( !defined $fh ) { $fh = \*STDERR; } foreach my $id ( keys %{$self->{'_pair_hash_from'}} ) { print $fh "From Hash $id\n"; foreach my $pair ( @{$self->{'_pair_hash_from'}->{uc($id)}} ) { print $fh " ",$pair->from->start," ",$pair->from->end,":",$pair->to->start," ",$pair->to->end," ",$pair->to->id,"\n"; } } } # _sort # # Function sort function so that all # mappings are sorted by # chromosome start # Returntype none # Exceptions none # Caller internal # sub _sort { my ($self) = @_; my $to = $self->{'to'}; my $from = $self->{'from'}; foreach my $id ( keys %{ $self->{"_pair_$from"} } ) { @{ $self->{"_pair_$from"}->{$id} } = sort { $a->{'from'}->{'start'} <=> $b->{'from'}->{'start'} } @{ $self->{"_pair_$from"}->{$id} }; } foreach my $id ( keys %{ $self->{"_pair_$to"} } ) { @{ $self->{"_pair_$to"}->{$id} } = sort { $a->{'to'}->{'start'} <=> $b->{'to'}->{'start'} } @{ $self->{"_pair_$to"}->{$id} }; } $self->_merge_pairs(); $self->_is_sorted(1); } # this function merges pairs that are adjacent into one sub _merge_pairs { my $self = shift; my ( $lr, $lr_from, $del_pair, $next_pair, $current_pair ); my $map_to = $self->{'to'}; my $map_from = $self->{'from'}; $self->{'pair_count'} = 0; for my $key ( keys %{$self->{"_pair_$map_to"}} ) { $lr = $self->{"_pair_$map_to"}->{$key}; my $i = 0; my $next = 1; my $length = $#{$lr}; while( $next <= $length ) { $current_pair = $lr->[$i]; $next_pair = $lr->[$next]; $del_pair = undef; if(exists $current_pair->{'indel'} || exists $next_pair->{'indel'}){ #necessary to modify the merge function to not merge indels $next++; $i++; } else{ # duplicate filter if( $current_pair->{'to'}->{'start'} == $next_pair->{'to'}->{'start'} and $current_pair->{'from'}->{'id'} == $next_pair->{'from'}->{'id'} ) { $del_pair = $next_pair; } elsif(( $current_pair->{'from'}->{'id'} eq $next_pair->{'from'}->{'id'} ) && ( $next_pair->{'ori'} == $current_pair->{'ori'} ) && ( $next_pair->{'to'}->{'start'} -1 == $current_pair->{'to'}->{'end'} )) { if( $current_pair->{'ori'} == 1 ) { # check forward strand merge if( $next_pair->{'from'}->{'start'} - 1 == $current_pair->{'from'}->{'end'} ) { # normal merge with previous element $current_pair->{'to'}->{'end'} = $next_pair->{'to'}->{'end'}; $current_pair->{'from'}->{'end'} = $next_pair->{'from'}->{'end'}; $del_pair = $next_pair; } } else { # check backward strand merge if( $next_pair->{'from'}->{'end'} + 1 == $current_pair->{'from'}->{'start'} ) { # yes its a merge $current_pair->{'to'}->{'end'} = $next_pair->{'to'}->{'end'}; $current_pair->{'from'}->{'start'} = $next_pair->{'from'}->{'start'}; $del_pair = $next_pair; } } } if( defined $del_pair ) { splice( @$lr, $next, 1 ); $lr_from = $self->{"_pair_$map_from"}->{uc($del_pair->{'from'}->{'id'})}; for( my $j=0; $j <= $#{$lr_from}; $j++ ) { if( $lr_from->[$j] == $del_pair ) { splice( @$lr_from, $j, 1 ); last; } } $length--; if( $length < $next ) { last; } } else { $next++; $i++; } } } $self->{'pair_count'} += scalar( @$lr ); } } # _is_sorted # # Arg 1 int $sorted # Function toggle for whether the (internal) # map data are sorted # Returntype int # Exceptions none # Caller internal # sub _is_sorted { my ($self, $value) = @_; $self->{'_is_sorted'} = $value if (defined($value)); return $self->{'_is_sorted'}; } 1;