=head1 LICENSE Copyright (c) 1999-2009 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at . Questions may also be sent to the Ensembl help desk at . =cut =head1 NAME Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices representing alternative assemblies =head1 SYNOPSIS my $slice = $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice); # create a new strain slice adaptor and attach it to the MSC my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db); $msc->set_StrainSliceAdaptor($ssa); # now attach strain $msc->attach_StrainSlice('Watson'); =head1 DESCRIPTION NOTE: this code is under development and not fully functional nor tested yet. Use only for development. This adaptor is a factory for creating MappedSlices representing strains and attaching them to a MappedSliceContainer. A mapper will be created to map between the reference slice and the common container slice coordinate system. =head1 METHODS new fetch_by_name =head1 REALTED MODULES Bio::EnsEMBL::MappedSlice Bio::EnsEMBL::MappedSliceContainer Bio::EnsEMBL::AlignStrainSlice Bio::EnsEMBL::StrainSlice =cut package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor; use strict; use warnings; no warnings 'uninitialized'; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::MappedSlice; use Bio::EnsEMBL::Mapper; use Bio::EnsEMBL::DBSQL::BaseAdaptor; our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); =head2 new Example : my $strain_slice_adaptor = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new; Description : Constructor. Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor Exceptions : none Caller : general Status : At Risk : under development =cut sub new { my $caller = shift; my $class = ref($caller) || $caller; my $self = $class->SUPER::new(@_); return $self; } =head2 fetch_by_name Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container to attach MappedSlices to Arg[2] : String $name - the name of the strain to fetch Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') }; Description : Creates a MappedSlice representing a version of the container's reference slice with variant alleles from the named strain Return type : listref of Bio::EnsEMBL::MappedSlice Exceptions : thrown on wrong or missing arguments Caller : general, Bio::EnsEMBL::MappedSliceContainer Status : At Risk : under development =cut sub fetch_by_name { my $self = shift; my $container = shift; my $name = shift; # argueent check unless ($container and ref($container) and $container->isa('Bio::EnsEMBL::MappedSliceContainer')) { throw("Need a MappedSliceContainer."); } unless ($name) { throw("Need a strain name."); } my $slice = $container->ref_slice; # get a connection to the variation DB my $variation_db = $self->db->get_db_adaptor('variation'); unless($variation_db) { warning("Variation database must be attached to core database to retrieve variation information"); return ''; } # now get an allele feature adaptor my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor; # check we got it unless(defined $af_adaptor) { warning("Not possible to retrieve AlleleFeatureAdaptor from variation database"); return ''; } # now get an individual adaptor my $ind_adaptor = $variation_db->get_IndividualAdaptor; # check we got it unless(defined $ind_adaptor) { warning("Not possible to retrieve IndividualAdaptor from variation database"); return ''; } # fetch individual object for this strain name my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)}; # check we got a result unless(defined $ind) { warn("Strain ".$name." not found in the database"); return ''; } ## MAP STRAIN SLICE TO REF SLICE ################################ # create a mapper my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice'); # create a mapped_slice object my $mapped_slice = Bio::EnsEMBL::MappedSlice->new( -ADAPTOR => $self, -CONTAINER => $container, -NAME => $slice->name."\#strain_$name", ); # get the strain slice my $strain_slice = $slice->get_by_strain($ind->name); # get all allele features for this slice and individual #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)}; # get allele features with coverage info my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1); # check we got some data #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0]; my $start_slice = $slice->start; my $start_strain = 1; my $sr_name = $slice->seq_region_name; #my $sr_name = 'ref_slice'; my ($end_slice, $end_strain, $allele_length); my $indel_flag = 0; my $total_length_diff = 0; # check for AFs if(defined($afs) && scalar @$afs) { # go through each AF foreach my $af(@$afs) { # find out if it changes the length if($af->length_diff != 0) { $indel_flag = 1; $total_length_diff += $af->length_diff; # get the allele length $allele_length = $af->length + $af->length_diff(); $end_slice = $slice->start + $af->start() - 2; if ($end_slice >= $start_slice){ $end_strain = $end_slice - $start_slice + $start_strain; #add the sequence that maps $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice); #add the indel $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); $start_strain = $end_strain + $allele_length + 1; } else{ #add the indel $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); $start_strain += $allele_length; } $start_slice = $end_slice + $af->length+ 1; } } } # add the remaining coordinates (or the whole length if no indels found) $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end); # add the slice/mapper pair $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper); ## MAP REF_SLICE TO CONTAINER SLICE ################################### if($total_length_diff > 0) { # create a new mapper my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container'); # get existing pairs my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container'); my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice'); # we need a list of indels (specifically inserts) my @indels; # go through existing first foreach my $pair(@existing_pairs) { if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { my $indel; $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start); # we're only interested in inserts here, not deletions next unless $indel->{'length_diff'} > 0; $indel->{'ref_start'} = $pair->from->start; $indel->{'ref_end'} = $pair->from->end; $indel->{'length'} = $pair->to->end - $pair->to->start; push @indels, $indel; } } # now new ones foreach my $pair(@new_pairs) { if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { my $indel; $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start); # we're only interested in inserts here, not deletions next unless $indel->{'length_diff'} > 0; $indel->{'ref_start'} = $pair->to->start; $indel->{'ref_end'} = $pair->to->end; $indel->{'length'} = $pair->from->end - $pair->from->start; push @indels, $indel; } } # sort them @indels = sort { $a->{'ref_start'} <=> $b->{'ref_start'} || # by position $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest } @indels; # clean them my @new_indels = (); my $p = $indels[0]; push @new_indels, $indels[0] if scalar @indels; for my $i(1..$#indels) { my $c = $indels[$i]; if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) { push @new_indels, $c; $p = $c; } } $start_slice = $slice->start; $start_strain = 1; $sr_name = $slice->seq_region_name; #$sr_name = 'ref_slice'; foreach my $indel(@new_indels) { $end_slice = $indel->{'ref_end'}; $end_strain = $start_strain + ($end_slice - $start_slice); $allele_length = $indel->{'length'} + $indel->{'length_diff'}; $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain); $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length); $start_strain = $end_strain + $allele_length + 1; $start_slice = $end_slice + $indel->{'length'} + 1; } $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice)); # replace the mapper with the new mapper $container->mapper($new_mapper); # change the container slice's length according to length diff $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1)); } return [$mapped_slice]; } 1;