=head1 LICENSE Copyright (c) 1999-2013 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/legal/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at . Questions may also be sent to the Ensembl help desk at . =cut # # Ensembl module for Bio::EnsEMBL::Variation::DBSQL::PopulationAdaptor # # Copyright (c) 2004 Ensembl # # You may distribute this module under the same terms as perl itself # # =head1 NAME Bio::EnsEMBL::Variation::DBSQL::PopulationAdaptor =head1 SYNOPSIS $reg = 'Bio::EnsEMBL::Registry'; $reg->load_registry_from_db(-host => 'ensembldb.ensembl.org',-user => 'anonymous'); $pa = $reg->get_adaptor("human","variation","population"); # Get a Population by its internal identifier $pop = $pa->fetch_by_dbID(145); # fetch a population by its name $pop = $pa->fetch_by_name('PACIFIC'); # fetch all sub populations of a population foreach $sub_pop (@{$pa->fetch_all_by_super_Population($pop)}) { print $sub_pop->name(), " is a sub population of ", $pop->name(), "\n"; } # fetch all super populations foreach $super_pop (@{$pa->fetch_all_by_sub_Population($pop)}) { print $pop->name(), " is a sub population of ", $super_pop->name(), "\n"; } =head1 DESCRIPTION This adaptor provides database connectivity for Population objects. Populations may be retrieved from the Ensembl variation database by several means using this module. =head1 METHODS =cut use strict; use warnings; package Bio::EnsEMBL::Variation::DBSQL::PopulationAdaptor; use Bio::EnsEMBL::Variation::DBSQL::BaseAdaptor; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Scalar qw(wrap_array); use Bio::EnsEMBL::Variation::Population; use DBI qw(:sql_types); our @ISA = ('Bio::EnsEMBL::Variation::DBSQL::BaseAdaptor'); sub store { my ($self, $pop) = @_; my $dbh = $self->dbc->db_handle; my $sth = $dbh->prepare(q{ INSERT INTO population ( name, size, description, collection, freqs_from_gts, display ) VALUES (?,?,?,?,?,?) }); $sth->execute( $pop->name, $pop->size, $pop->description, $pop->collection || 0, $pop->_freqs_from_gts || 0, $pop->display, ); $sth->finish; # get the population_id inserted my $dbID = $dbh->last_insert_id(undef, undef, 'population', 'population_id'); $pop->{dbID} = $dbID; $pop->{adaptor} = $self; } =head2 fetch_population_by_synonym Arg [1] : String $population_synonym Arg [2] : String $source (optional) Example : my $pop = $pop_adaptor->fetch_population_by_synonym($population_synonym, $source); Description : Retrieves populations for the synonym given in the source. If no source is provided, retrieves all the synonyms Returntype : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : none Caller : general Status : Stable =cut sub fetch_population_by_synonym { my $self = shift; my $synonym_name = shift; my $source = shift; my ($populations, $population_ids, $population_id); my $sql; if (defined $source){ $sql = qq{ SELECT ps.population_id FROM population_synonym ps, source s WHERE ps.name = ? and ps.source_id = s.source_id AND s.name = "$source"}; } else{ $sql = qq{ SELECT population_id FROM population_synonym WHERE name = ?}; } my $sth = $self->prepare($sql); $sth->bind_param(1, $synonym_name, SQL_VARCHAR); $sth->execute(); $sth->bind_columns(\$population_id); while ($sth->fetch()){ push @{$population_ids}, $population_id; } foreach my $population_id (@{$population_ids}){ my $population = $self->fetch_by_dbID($population_id); push @{$populations}, $population; } return $populations; } =head2 fetch_synonyms Arg [1] : $population_id Arg [2] (optional) : $source Example : my $dbSNP_synonyms = $pop_adaptor->fetch_synonyms($population_id, $dbSNP); my $all_synonyms = $pop_adaptor->fetch_synonyms($population_id); Description: Retrieves synonyms for the source provided. Otherwise, return all the synonyms for the population_id Returntype : listref of strings Exceptions : none Caller : general Status : Stable =cut sub fetch_synonyms { my $self = shift; my $dbID = shift; my $source = shift; my $synonym; my $synonyms; my $sql; if (defined $source){ $sql = qq{SELECT ps.name FROM population_synonym ps, source s WHERE ps.population_id = ? AND ps.source_id = s.source_id AND s.name = "$source"} } else{ $sql = qq{SELECT name FROM population_synonym WHERE population_id = ?}; } my $sth = $self->prepare($sql); $sth->bind_param(1,$dbID,SQL_INTEGER); $sth->execute(); $sth->bind_columns(\$synonym); while ($sth->fetch){ push @{$synonyms}, $synonym; } return $synonyms; } =head2 fetch_by_name Arg [1] : string $name Example : $pop = $pop_adaptor->fetch_by_name('NUSPAE:Singapore_HDL'); Description: Retrieves a population object via its name Returntype : Bio::EnsEMBL::Variation::Population Exceptions : throw if name argument is not defined Caller : general Status : Stable =cut sub fetch_by_name { my $self = shift; my $name = shift; throw('Name argument expected.') if (!defined($name)); my $sth = $self->prepare(q{ SELECT population_id, name, size, description, collection, freqs_from_gts, display FROM population WHERE name = ?;}); $sth->bind_param(1,$name,SQL_VARCHAR); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return undef if (!@$populations); return $populations->[0]; } =head2 fetch_all_by_dbID_list Arg [1] : listref of dbIDs Example : $pops = $pop_adaptor->fetch_all_by_dbID_list([907, 1132]); Description: Retrieves a listref of population objects via a list of internal dbID identifiers Returntype : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if list argument is not defined Caller : general Status : Stable =cut sub fetch_all_by_dbID_list { my $self = shift; my $list = shift; if (!defined($list) || ref($list) ne 'ARRAY') { throw("list reference argument is required"); } return [] unless scalar @$list >= 1; my $id_str = (@$list > 1) ? " IN (".join(',',@$list).")" : ' = \''.$list->[0].'\''; my $sth = $self->prepare(qq{ SELECT population_id, name, size, description, collection, freqs_from_gts, display FROM population WHERE population_id $id_str;}); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return undef if(!@$populations); return $populations; } =head2 fetch_all_by_name_search Arg [1] : string $name Example : $pop = $pop_adaptor->fetch_all_by_name_search('CEU'); Description: Retrieves a list of population objects whose name matches the search term. Returntype : Listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if name argument is not defined Caller : general Status : At Risk =cut sub fetch_all_by_name_search { my $self = shift; my $name = shift; throw('Name argument expected.') if(!defined($name)); my $sth = $self->prepare(q{ SELECT population_id, name, size, description, collection, freqs_from_gts, display FROM population WHERE name like concat('%', ?, '%')}); $sth->bind_param(1,$name,SQL_VARCHAR); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_all_by_super_Population Arg [1] : Bio::EnsEMBL::Variation::Population $pop Example : foreach $sub_pop (@{$pa->fetch_all_by_super_Population($pop)}) { print $sub_pop->name(), "\n"; } Description: Retrieves all sub populations of a provided population. Returntype : listref of Bio::EnsEMBL::Variation::Population objetcs Exceptions : throw on bad argument Caller : general Status : At Risk =cut sub fetch_all_by_super_Population { my $self = shift; my $pop = shift; if (!ref($pop) || !$pop->isa('Bio::EnsEMBL::Variation::Population')) { throw('Bio::EnsEMBL::Variation::Population argument expected'); } if (!$pop->dbID()) { warning("Cannot retrieve sub populations for population without dbID"); return []; } my $sth = $self->prepare(q{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, population_structure ps WHERE p.population_id = ps.sub_population_id AND ps.super_population_id = ?}); $sth->bind_param(1,$pop->dbID,SQL_INTEGER); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_all_by_sub_Population Arg [1] : Bio::EnsEMBL::Variation::Population $pop Example : foreach $super_pop (@{$pa->fetch_all_by_sub_Population($pop)}) { print $super_pop->name(), "\n"; } Description: Retrieves all super populations for a provided population Returntype : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw on bad argument Caller : general Status : At Risk =cut sub fetch_all_by_sub_Population { my $self = shift; my $pop = shift; if (!ref($pop) || !$pop->isa('Bio::EnsEMBL::Variation::Population')) { throw('Bio::EnsEMBL::Variation::Population argument expected'); } if (!$pop->dbID()) { warning("Cannot retrieve super populations for population without dbID"); return []; } my $sth = $self->prepare(q{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, population_structure ps WHERE p.population_id = ps.super_population_id AND ps.sub_population_id = ?}); $sth->bind_param(1,$pop->dbID,SQL_INTEGER); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_default_LDPopulation Args : none Example : $population = $pop_adaptor->fetch_default_LDPopulation(); Description : Obtains the population it is used as a default in the LD display of the pairwise LD data ReturnType : Bio::EnsEMBL::Variation::Population Exceptions : none Caller : general Status : At Risk =cut sub fetch_default_LDPopulation { my $self = shift; my $population_id; my $sth = $self->prepare(qq{SELECT meta_value from meta where meta_key = ?}); $sth->bind_param(1,'pairwise_ld.default_population',SQL_VARCHAR); $sth->execute(); $sth->bind_columns(\$population_id); $sth->fetch(); $sth->finish; if (defined $population_id) { return $self->fetch_by_dbID($population_id); } else{ return undef; } } =head2 fetch_all_LD_Populations Example : @populations = @{$pop_adaptor->fetch_all_LD Populations(); Description : Gets all populations that can be used in the LD display ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : none Caller : general Status : At Risk =cut sub fetch_all_LD_Populations{ my $self = shift; return [grep {$_->name !~ /ALL|AFR|AMR|ASN|EUR/} @{$self->generic_fetch(qq{ p.display = 'LD' })}]; } =head2 fetch_all_HapMap_Populations Example : @populations = @{$pop_adaptor->fetch_all_HapMap_populations(); Description : Gets all populations from the HapMap project (human only!) ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : none Caller : general Status : At Risk =cut sub fetch_all_HapMap_Populations { my $self = shift; return $self->generic_fetch(qq{ p.name like 'cshl-hapmap%' }); } =head2 fetch_all_1KG_Populations Example : @populations = @{$pop_adaptor->fetch_all_1KG_populations(); Description : Gets all populations from the 1000 genomes project (human only!) ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : none Caller : general Status : At Risk =cut sub fetch_all_1KG_Populations{ my $self = shift; return $self->generic_fetch(qq{ p.name like '1000GENOMES%' }); } =head2 fetch_all_by_Individual Arg [1] : Bio::EnsEMBL::Variation::Individual $ind Example : my $ind = $ind_adaptor->fetch_by_name('NA12004'); foreach my $pop (@{$pop_adaptor->fetch_all_by_Individual($ind)}){ print $pop->name, "\n"; } Description : Retrieves all populations from a specified individual ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if incorrect argument is passed warning if provided individual does not have a dbID Caller : general Status : Stable =cut sub fetch_all_by_Individual { my $self = shift; my $ind = shift; if (!ref($ind) || !$ind->isa('Bio::EnsEMBL::Variation::Individual')) { throw("Bio::EnsEMBL::Variation::Individual arg expected"); } if (!$ind->dbID()) { warning("Individual does not have dbID, cannot retrieve Individuals"); return []; } my $sth = $self->prepare(qq{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, individual_population ip WHERE p.population_id = ip.population_id AND ip.individual_id = ?}); $sth->bind_param(1,$ind->dbID,SQL_INTEGER); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_all_by_Individual_list Arg [1] : listref of of Bio::EnsEMBL::Variation::Individual objects Example : foreach my $pop (@{$pop_adaptor->fetch_all_by_Individual_list($inds)}){ print $pop->name,"\n"; } Description : Retrieves all populations from a specified individual ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if incorrect argument is passed warning if provided individual does not have a dbID Caller : general Status : Stable =cut sub fetch_all_by_Individual_list { my $self = shift; my $list = shift; if (!ref($list) || !$list->[0]->isa('Bio::EnsEMBL::Variation::Individual')) { throw("Listref of Bio::EnsEMBL::Variation::Individual arg expected"); } if (!$list->[0]->dbID()) { warning("First Individual does not have dbID, cannot retrieve Populations"); return []; } my $id_str = " IN (" . join(',', map {$_->dbID} @$list). ")"; my $sth = $self->prepare(qq{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, individual_population ip WHERE p.population_id = ip.population_id AND ip.individual_id $id_str }); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_tagged_Population Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $vf Example : my $vf = $vf_adaptor->fetch_by_name('rs205621'); my $populations_tagged = $vf->is_tagged(); foreach my $pop (@{$vf_adaptor->is_tagged}){ print $pop->name, " has been tagged using a 0.99 r2 criteria\n"; } Description : Retrieves all populations from a specified variation feature that have been tagged ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if incorrect argument is passed warning if provided variation feature does not have a dbID Caller : general Status : At Risk =cut sub fetch_tagged_Population{ my $self = shift; my $variation_feature = shift; if (!ref($variation_feature) || !$variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { throw("Bio::EnsEMBL::Variation::VariationFeature arg expected"); } if (!$variation_feature->dbID()) { warning("Variation feature does not have dbID, cannot retrieve tagged populations"); return []; } my $sth = $self->prepare(qq{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, tagged_variation_feature tvf WHERE p.population_id = tvf.population_id AND tvf.tagged_variation_feature_id = ? }); $sth->bind_param(1,$variation_feature->dbID,SQL_INTEGER); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 fetch_tag_Population Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $vf Example : my $vf = $vf_adaptor->fetch_by_name('rs205621'); my $populations_is_tag = $vf->is_tag(); foreach my $pop (@{$vf_adaptor->is_tag}){ print $pop->name, " has been tagged using a 0.99 r2 criteria\n"; } Description : Retrieves all populations in which the specified variation feature is a tag ReturnType : listref of Bio::EnsEMBL::Variation::Population objects Exceptions : throw if incorrect argument is passed warning if provided variation feature does not have a dbID Caller : general Status : At Risk =cut sub fetch_tag_Population { my $self = shift; my $variation_feature = shift; if (!ref($variation_feature) || !$variation_feature->isa('Bio::EnsEMBL::Variation::VariationFeature')) { throw("Bio::EnsEMBL::Variation::VariationFeature arg expected"); } if (!$variation_feature->dbID()) { warning("Variation feature does not have dbID, cannot retrieve tag populations"); return []; } my $sth = $self->prepare(qq{ SELECT p.population_id, p.name, p.size, p.description, p.collection, p.freqs_from_gts, p.display FROM population p, tagged_variation_feature tvf WHERE p.population_id = tvf.population_id AND tvf.variation_feature_id = ? }); $sth->bind_param(1,$variation_feature->dbID,SQL_INTEGER); $sth->execute(); my $populations = $self->_objs_from_sth($sth); $sth->finish(); return $populations; } =head2 get_dbIDs_for_population_names Arg [1] : $population_names Listref of population names. Example : my $ids = $pop_adaptor->get_dbIDs_for_population_names(['CSHL-HAPMAP:HAPMAP-MEX','1000GENOMES:pilot_1_CHB+JPT_low_coverage_panel']); map {printf("Population: \%s has dbID \%d\n",$ids->{$_},$_)} keys(%{$ids}); Description : Retrieve the dbIDs for a list of population names ReturnType : hashref with dbIDs as keys and population names as values Caller : web Status : At Risk =cut sub get_dbIDs_for_population_names { my $self = shift; my $population_names = shift; # Wrap the argument into an arrayref $population_names = wrap_array($population_names); # Define a statement handle for the lookup query my $stmt = qq{ SELECT population_id, name FROM population WHERE name = ? LIMIT 1 }; my $sth = $self->prepare($stmt); # Loop over the population names and query the db my %dbIDs; foreach my $name (@{$population_names}) { $sth->execute($name); my ($id, $name); $sth->bind_columns(\$id,\$name); $sth->execute(); $dbIDs{$id} = $name if (defined($id)); } return \%dbIDs; } =head2 get_sample_id_for_population_names Arg [1] : $population_names reference to list of population names Example : my $ids = $pop_adaptor->get_sample_id_for_population_names(['CSHL-HAPMAP:HAPMAP-MEX','1000GENOMES:pilot_1_CHB+JPT_low_coverage_panel']); map {printf("Population: \%s has sample_id \%d\n",$ids->{$_},$_)} keys(%{$ids}); Description : Retrieve the sample_ids for a list of population names ReturnType : reference to hash with sample_ids as keys and population names as values Caller : web Status : At Risk =cut sub get_sample_id_for_population_names { my $self = shift; warn('The use of this method is deprecated. Use get_dbIDs_for_population_names instead.'); } sub _get_individual_population_hash { my $self = shift; my $id_list_ref = shift; if(!defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY') { throw("id_list list reference argument is required"); } return [] if (!@$id_list_ref); my %ip_hash; my $max_size = 200; my @id_list = @$id_list_ref; while (@id_list) { my @ids; if(@id_list > $max_size) { @ids = splice(@id_list, 0, $max_size); } else { @ids = splice(@id_list, 0); } my $id_str; if(@ids > 1) { $id_str = " IN (" . join(',', @ids). ")"; } else { $id_str = " = ".$ids[0]; } my $sth = $self->prepare(qq/ SELECT individual_id, population_id FROM individual_population WHERE individual_id $id_str /); $sth->execute(); my ($ind, $pop); $sth->bind_columns(\$ind, \$pop); $ip_hash{$pop}{$ind} = 1 while $sth->fetch; $sth->finish(); } # NB COMMENTED OUT FOR NOW AS IT DOESN'T SEEM TO WORK PROPERLY # now get super-populations #my @pops = keys %ip_hash; #my %new_pops; # ## need to iterate in case there's multiple levels #while(scalar @pops) { # # my $id_str; # if(scalar @pops) { # $id_str = " IN (" . join(',', @pops). ")"; # } else { # $id_str = " = ".$pops[0]; # } # # @pops = (); # # my $sth = $self->prepare(qq{ # SELECT sub_population_sample_id, super_population_sample_id # FROM population_structure # WHERE sub_population_sample_id $id_str # }); # $sth->execute(); # # my ($sub, $super); # $sth->bind_columns(\$sub, \$super); # while($sth->fetch) { # push @{$new_pops{$sub}}, $super; # push @pops, $super; # } # $sth->finish(); #} # #foreach my $sub(keys %new_pops) { # foreach my $super(@{$new_pops{$sub}}) { # $ip_hash{$super}{$_} = 1 for keys %{$ip_hash{$sub}}; # } #} # return \%ip_hash; } # # private method, creates population objects from an executed statement handle # ordering of columns must be consistant # sub _objs_from_sth { my $self = shift; my $sth = shift; my @pops; my ($pop_id, $name, $size, $desc, $collection, $freqs, $display); $sth->bind_columns(\$pop_id, \$name, \$size, \$desc, \$collection, \$freqs, \$display); while($sth->fetch()) { push @pops, Bio::EnsEMBL::Variation::Population->new( -dbID => $pop_id, -ADAPTOR => $self, -NAME => $name, -SIZE => $size, -DESCRIPTION => $desc, -COLLECTION => $collection, -FREQS => $freqs, -DISPLAY => $display,); } return \@pops; } sub _tables { return (['population','p']); } sub _columns { return qw(p.population_id p.name p.size p.description p.collection p.freqs_from_gts p.display); } sub _default_where_clause { return ''; } 1;