# # Ensembl module for Bio::EnsEMBL::DBSQL::ProbeAdaptor # =head1 LICENSE Copyright (c) 1999-2013 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at . Questions may also be sent to the Ensembl help desk at . =head1 NAME Bio::EnsEMBL::DBSQL::ProbeAdaptor - A database adaptor for fetching and storing Probe objects. =head1 SYNOPSIS my $opa = $db->get_ProbeAdaptor(); my $probe = $opa->fetch_by_array_probe_probeset('Array-1', 'Probe-1', undef); =head1 DESCRIPTION The ProbeAdaptor is a database adaptor for storing and retrieving Probe objects. =head1 SEE ALSO Bio::EnsEMBL::Funcgen::Probe =cut package Bio::EnsEMBL::Funcgen::DBSQL::ProbeAdaptor; use strict; use warnings; use Bio::EnsEMBL::Utils::Exception qw( throw warning ); use Bio::EnsEMBL::Funcgen::Probe; use Bio::EnsEMBL::Funcgen::DBSQL::BaseAdaptor;#Have to use here to import @EXPORT use base qw(Bio::EnsEMBL::Funcgen::DBSQL::BaseAdaptor); #@ISA #change to parent with perl 5.10 =head2 fetch_by_array_probe_probeset_name Arg [1] : string - name of array Arg [2] : string - name of probe Arg [3] : (optional) string - name of probeset Example : my $probe = $opa->fetch_by_array_probeset_probe('Array-1', 'Probe-1', 'ProbeSet-1'); Description: Returns a probe given a combination of array name, probeset and probe name. This will uniquely define an Affy probe. Only one probe is ever returned. Returntype : Bio::EnsEMBL::Funcgen::Probe Exceptions : throws if array or probe name not defined Caller : General Status : At Risk - rename to fetch_by_probe_array_probeset_name? =cut #This does not currently capture on plate replicate probes with different names #Only returns the record corresponding to the given name and not the other replicate name sub fetch_by_array_probe_probeset_name { my ($self, $array_name, $probe_name, $probeset_name) = @_; if(! (defined $array_name && defined $probe_name)){ throw('You must provide at least and array and probe name'); } my $tables = 'probe p, array_chip ac, array a'; $tables .= ', probe_set ps' if defined $probeset_name; my $sql = "SELECT distinct(p.probe_id) FROM $tables WHERE a.name=? and a.array_id=ac.array_id and ac.array_chip_id=p.array_chip_id and p.name=?"; $sql .= ' AND p.probe_set_id=ps.probe_set_id and ps.name=?' if defined $probeset_name; my $sth = $self->db->dbc->prepare($sql); $sth->bind_param(1, $array_name, SQL_VARCHAR); $sth->bind_param(2, $probe_name, SQL_VARCHAR); $sth->bind_param(3, $probeset_name, SQL_VARCHAR) if defined $probeset_name; $sth->execute; #This should only return one result #The only possible way this would not return one result #is if an identically named array(:probeset):probe which had a different sequence #As Import array would separate these based on the sequence hash my ($dbid) = $sth->fetchrow_array; return (defined $dbid) ? $self->fetch_by_dbID($dbid) : undef; } =head2 fetch_all_by_name Arg [1] : string - probe name Example : my @probes = @{$opa->fetch_all_by_name('Probe1')}; Description: Convenience method to re-instate the functionality of $core_dbentry_adpator->fetch_all_by_External_name('probe_name'); WARNING: This may not be the probe you are expecting as probe names are not unqiue across arrays and vendors. These should ideally be validated using the attached array information or alternatively use fetch_by_array_probe_probeset_name Returns a probe with the given name. Returntype : Arrayref Exceptions : Throws if name not passed Caller : General Status : At Risk =cut sub fetch_all_by_name{ my ($self, $name) = @_; throw('Must provide a probe name argument') if ! defined $name; $self->bind_param_generic_fetch($name, SQL_VARCHAR); return $self->generic_fetch('p.name=?'); } =head2 fetch_all_by_ProbeSet Arg [1] : Bio::EnsEMBL::ProbeSet Example : my @probes = @{$opa->fetch_all_by_ProbeSet($pset)}; Description: Fetch all probes in a particular ProbeSet. Returntype : Listref of Bio::EnsEMBL::Probe objects Exceptions : None Caller : General Status : At Risk =cut sub fetch_all_by_ProbeSet { my ($self, $probeset) = @_; $self->db->is_stored_and_valid('Bio::EnsEMBL::Funcgen::ProbeSet', $probeset); return $self->generic_fetch('p.probe_set_id = '.$probeset->dbID); } =head2 fetch_all_by_Array Arg [1] : Bio::EnsEMBL::Funcgen::Array Example : my @probes = @{$opa->fetch_all_by_Array($array)}; Description: Fetch all probes on a particular array. Returntype : Listref of Bio::EnsEMBL::Probe objects. Exceptions : throws if arg is not valid or stored Caller : General Status : At Risk =cut sub fetch_all_by_Array { my $self = shift; my $array = shift; if(! (ref($array) && $array->isa('Bio::EnsEMBL::Funcgen::Array') && $array->dbID())){ throw('Need to pass a valid stored Bio::EnsEMBL::Funcgen::Array'); } #get all array_chip_ids, for array and do a multiple OR statement with generic fetch return $self->generic_fetch('p.array_chip_id IN ('.join(',', @{$array->get_array_chip_ids()}).')'); } =head2 fetch_all_by_ArrayChip Arg [1] : Bio::EnsEMBL::Funcgen::ArrayChip Example : my @probes = @{$opa->fetch_all_by_ArrayChip($array_chip)}; Description: Fetch all probes on a particular ArrayChip. Returntype : Listref of Bio::EnsEMBL::Probe objects. Exceptions : throw is arg is not valid or stored Caller : General Status : At Risk =cut sub fetch_all_by_ArrayChip { my $self = shift; my $array_chip = shift; if(! (ref($array_chip) && $array_chip->isa('Bio::EnsEMBL::Funcgen::ArrayChip') && $array_chip->dbID())){ throw('Need to pass a valid stored Bio::EnsEMBL::Funcgen::ArrayChip'); } return $self->generic_fetch("p.array_chip_id =".$array_chip->dbID()); } =head2 fetch_by_ProbeFeature Arg [1] : Bio::EnsEMBL::Funcgen::ProbeFeature Example : my $probe = $opa->fetch_by_ProbeFeature($feature); Description: Returns the probe that created a particular feature. Returntype : Bio::EnsEMBL::Probe Exceptions : Throws if argument is not a Bio::EnsEMBL::Funcgen::ProbeFeature object Caller : General Status : At Risk =cut sub fetch_by_ProbeFeature { my $self = shift; my $feature = shift; if ( !ref($feature) || !$feature->isa('Bio::EnsEMBL::Funcgen::ProbeFeature') || !$feature->{'probe_id'} ) { throw('fetch_by_ProbeFeature requires a stored Bio::EnsEMBL::Funcgen::ProbeFeature object'); } return $self->fetch_by_dbID($feature->{'probe_id'}); } =head2 _tables Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns the names and aliases of the tables to use for queries. Returntype : List of listrefs of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _tables { my $self = shift; return (['probe', 'p']); } =head2 _columns Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns a list of columns to use for queries. Returntype : List of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _columns { my $self = shift; return qw( p.probe_id p.probe_set_id p.name p.length p.array_chip_id p.class p.description); } =head2 _objs_from_sth Arg [1] : DBI statement handle object Example : None Description: PROTECTED implementation of superclass abstract method. Creates Probe objects from an executed DBI statement handle. Returntype : Listref of Bio::EnsEMBL::Funcgen::Probe objects Exceptions : None Caller : Internal Status : At Risk =cut sub _objs_from_sth { my ($self, $sth) = @_; my (@result, $current_dbid, $arraychip_id, $probe_id, $probe_set_id, $name, $class, $probelength, $desc); my ($array, %array_cache, %probe_set_cache); $sth->bind_columns(\$probe_id, \$probe_set_id, \$name, \$probelength, \$arraychip_id, \$class, \$desc); #Complex query extension #We want the arrays, array_chip and probeset information setting #So the probe feature zmenu can just do one query to populate the zmenu unstead of 4 #Let's just do this with probset to start with as this is more simple #The object creation for the linked adaptors need to be commodotised #So we can say something like $probeset_adaptor->create_obj_from_sth_args #Caches will need to be built in the calling adaptor rather than the true object adaptor #No group required as we will always want intermediate data #Therefore cannot use this in combined with simple extension??? #Unless we explicitly state group by primary keys. #Need to build array of adaptor column hashes(order important) #Need to build and array of bound columns dependant #Then we need to parse output dependant on primary keys of each table #So we would need bolumns bound to hash values #We need a way to define the extended tables #Pass param hash to caller which uses BaseAdaptor method to add tables and columns #This would have to take into account anything added by the caller #Maybe the better way of doing this test would be to include all probes in a probeset? #Or maybe all probe_features for a probe? my $probe; while ( $sth->fetch() ) { #warn("Need to sort array cacheing, have redundant cache!!"); #This is nesting array and probeset objects in probe! #Is this required? or should we lazy load this? #Should we also do the same for probe i.e. nest or lazy load probeset #Setting here prevents, multiple queries, but if we store the array cache in the adaptor we can overcome this #danger of eating memory here, but it's onld the same as would be used for generating all the probesets #what about clearing the cache? #also as multiple array_chips map to same array, cache would be redundant #need to store only once and reference. #have array_cache and arraychip_map #arraychip_map would give array_id which would be key in array cache #This is kinda reinventing the wheel, but reducing queries and redundancy of global cache #cache would never be populated if method not called #there for reducing calls and memory, increasing speed of generation/initation #if method were called #would slightly slow down processing, and would slightly increase memory as cache(small as non-redundant) #and map hashes would persist ####MAKE THIS LAZY LOADED!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #Can we even do this given we won't then have the array context? #We should just force this for efficiency and make people keep the array if they ever want to use that info? #Will this affect any other methods? #Can we not change this to an ArrayChip cache and just reimplement the array method? $array = $array_cache{$arraychip_id} || $self->db->get_ArrayAdaptor()->fetch_by_array_chip_dbID($arraychip_id); #I don't think we need this? Certainly not for storing #$probe_set = $probe_set_cache{$probe_set_id} || $self->db->get_ArrayAdaptor()->fetch_by_array_chip_dbID($arraychip_id); #probe_set cache would be substantially bigger!! #potentially as many as the probes #Just build cache and nest for now,may want to just return ID and lazy load #This is a prime target for compound query extension #Either extend query by default and nest probe_set #Or lazy load probeset using cache somehow? #Use persistant probeset cache in ProbeSetAdaptor for dbID/Probe style queries my ($probeset); if($probe_set_id){ $probeset = $probe_set_cache{$probe_set_id} || $self->db->get_ProbeSetAdaptor()->fetch_by_dbID($probe_set_id); } if (!$current_dbid || $current_dbid != $probe_id) { # New probe #UC??? or does rearrange handle this? $probe = Bio::EnsEMBL::Funcgen::Probe->new ( -dbID => $probe_id, -name => $name, -array_chip_id => $arraychip_id, -array => $array, -probe_set => $probeset, -length => $probelength, -class => $class, -description => $desc, -adaptor => $self, ); push @result, $probe; $current_dbid = $probe_id; } else { # Extend existing probe # Probe methods depend on preloading of Array objects $probe->add_array_chip_probename($arraychip_id, $name, $array); } } return \@result; } =head2 store Arg [1] : List of Bio::EnsEMBL::Funcgen::Probe objects Example : $opa->store($probe1, $probe2, $probe3); Description: Stores given Probe objects in the database. Should only be called once per probe because no checks are made for duplicates Sets dbID and adaptor on the objects that it stores. Returntype : ARRAYREF Exceptions : Throws if arguments are not Probe objects Caller : General Status : At Risk =cut sub store { my ($self, @probes) = @_; my ($sth, $dbID, @panals, $pd_sth); my $pd_sql = "INSERT IGNORE into probe_design(probe_id, analysis_id, score, coord_system_id) values(?, ?, ?, ?)"; my $db = $self->db(); throw('Must call store with a list of Probe objects') if (scalar @probes == 0); #mv all prep statements here? #or at least main probe insert PROBE: foreach my $probe (@probes) { undef $dbID; if ( !ref $probe || ! $probe->isa('Bio::EnsEMBL::Funcgen::Probe') ) { throw("Probe must be an Probe object ($probe)"); } if ( $probe->is_stored($db) ) { warning('Probe [' . $probe->dbID() . '] is already stored in the database'); next PROBE; } # Get all the arrays this probe is on and check they're all in the database my %array_hashes; foreach my $ac_id (keys %{$probe->{'arrays'}}) { if (defined ${$probe->{'arrays'}}{$ac_id}->dbID()) { #Will this ever work as generally we're creating from scratch and direct access to keys above by passes DB fetch $array_hashes{$ac_id} = $probe->{'arrays'}{$ac_id}; } } throw('Probes need attached arrays to be stored in the database') if ( ! %array_hashes ); # Insert separate entry (with same oligo_probe_id) in oligo_probe # for each array/array_chip the probe is on foreach my $ac_id (keys %array_hashes) { my $ps_id = (defined $probe->probeset()) ? $probe->probeset()->dbID() : undef; foreach my $name(@{$probe->get_all_probenames($array_hashes{$ac_id}->name)}){ if (defined $dbID) { # Already stored $sth = $self->prepare ( "INSERT INTO probe( probe_id, probe_set_id, name, length, array_chip_id, class, description )". "VALUES (?, ?, ?, ?, ?, ?, ?)" ); $sth->bind_param(1, $dbID, SQL_INTEGER); $sth->bind_param(2, $ps_id, SQL_INTEGER); $sth->bind_param(3, $name, SQL_VARCHAR); $sth->bind_param(4, $probe->length(), SQL_INTEGER); $sth->bind_param(5, $ac_id, SQL_INTEGER); $sth->bind_param(6, $probe->class(), SQL_VARCHAR); $sth->bind_param(7, $probe->description, SQL_VARCHAR); $sth->execute(); } else { # New probe $sth = $self->prepare ( "INSERT INTO probe( probe_set_id, name, length, array_chip_id, class, description)". "VALUES (?, ?, ?, ?, ?, ?)" ); $sth->bind_param(1, $ps_id, SQL_INTEGER); $sth->bind_param(2, $name, SQL_VARCHAR); $sth->bind_param(3, $probe->length(), SQL_INTEGER); $sth->bind_param(4, $ac_id, SQL_INTEGER); $sth->bind_param(5, $probe->class(), SQL_VARCHAR); $sth->bind_param(6, $probe->description, SQL_VARCHAR); $sth->execute(); $dbID = $sth->{'mysql_insertid'}; $probe->dbID($dbID); $probe->adaptor($self); } } } if(@panals = @{$probe->get_all_design_scores(1)}){#1 is no fetch flag #we need to check for duplicates here, or can we just ignore them in the insert statement? #ignoring would be convenient but may lose info about incorrect duplicates #also not good general practise #solution would be nest them with a dbid value aswell as score #use ignore for now and update implementation when we create BaseProbeDesign? $pd_sth ||= $self->prepare($pd_sql); foreach my $probe_analysis(@panals){ my ($analysis_id, $score, $cs_id) = @{$probe_analysis}; $cs_id ||=0;#NULL $pd_sth->bind_param(1, $probe->dbID(), SQL_INTEGER); $pd_sth->bind_param(2, $analysis_id, SQL_INTEGER); $pd_sth->bind_param(3, $score, SQL_VARCHAR); $pd_sth->bind_param(4, $cs_id, SQL_INTEGER); $pd_sth->execute(); } } } return \@probes; } =head2 fetch_all_design_scores Arg [1] : Bio::EnsEMBL::Funcgen::Probe Example : my @probe_analyses = @{$pa->fetch_all_design_scores($probe)}; Description: Fetchs all probe design analysis records as analysis_id, score and coord_system_id Returntype : ARRAYREF Exceptions : throws if not passed a valid stored Probe Caller : General Status : at risk =cut sub fetch_all_design_scores{ my ($self, $probe) = @_; if(! ($probe && $probe->isa('Bio::EnsEMBL::Funcgen::Probe') && $probe->dbID())){ throw('Must pass a valid stored Bio::EnsEMBL::Funcgen::Probe'); } my $sql = 'SELECT analysis_id, score, coord_system_id from probe_design WHERE probe_id='.$probe->dbID.';'; return @{$self->db->dbc->db_handle->selectall_arrayref($sql)}; } 1;