=pod =head1 NAME Bio::EnsEMBL::Funcgen::Hive::IdenetifySetInputs; =head1 DESCRIPTION This module simply takes a list of paramters and a 'set_type' to identify Sets used as inputs for various parts of the analysis pipeline. =cut package Bio::EnsEMBL::Funcgen::Hive::IdentifySetInputs; use base ('Bio::EnsEMBL::Funcgen::Hive::BaseDB'); #Bio::EnsEMBL::Funcgen::Hive(::Config) #We don't need to discriminate between Runnables and RunnableDBs anymore #Just name the modules accordingly! use warnings; use strict; #use Bio::EnsEMBL::DBSQL::DBAdaptor; #use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; #use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (strip_param_args generate_slices_from_names # strip_param_flags run_system_cmd); use Bio::EnsEMBL::Utils::Exception qw (throw); #todo -slice_import_status? my %set_adaptor_methods = ( input_set => 'get_InputSetAdaptor', result_set => 'get_ResultSetAdaptor', feature_set => 'get_FeatureSetAdaptor', #data_set? ); #Accessors to catch compile time errors and avoid uncaught typos sub constraints_hash { return $_[0]->param('constraints_hash'); } sub set_type { return $_[0]->param_required('set_type'); } sub feature_types { return $_[0]->param('feature_types'); } sub cell_types { return $_[0]->param('cell_types'); } sub experimental_groups { return $_[0]->param('experimental_groups'); } #sub input_sets { return $_[0]->param('input_sets'); } sub analyses { return $_[0]->param('analyses'); } sub set_ids { return $_[0]->param('set_ids'); } sub set_adaptor { return $_[0]->param('set_adaptor'); } sub set_names { return $_[0]->param('set_names'); } sub sets { return $_[0]->param('sets'); } sub states { return $_[0]->param('states'); } sub fetch_input { # fetch parameters... my $self = shift @_; $self->SUPER::fetch_input; #Validate set_type my $set_type = $self->set_type; if(! exists $set_adaptor_methods{$set_type}){ throw("The -set_type $set_type is not supported by IdentifySetInputs.\n". "Valid options are ".join(' ', keys(%set_adaptor_methods)) ); } else{ #Set the method name value to reset the value as the actual adaptor my $method = $set_adaptor_methods{$set_type}; $self->param('set_adaptor', $self->out_db->$method); } $self->process_params([qw(set_names set_ids)], 1);#optional #Parse comma separated lists of filters into arrayrefs of string or objects $self->param('constraints_hash', $self->process_params([qw(feature_types cell_types states analyses experimental_groups)], 1, 1) #optional/as array flags ); #Catch mutally exclusive filter params if($self->feature_types || $self->cell_types || $self->experimental_groups || $self->analyses || $self->states){ #all these are OR filters except states which is an AND filter if($self->set_names || $self->set_ids){ throw('You have specified mutually exclusive filter params for the '. "IdentifySetInputs analysis\nPlease specify restrict to ". '-set_name or -set_ids or a combination other filters '. '(e.g. -experimental_groups -feature_types -cell_types -analyses -states'); } } elsif(! ($self->set_names || $self->set_ids)){ throw('You must specifiy some IdentifySetInputs fitler params either '. '-input_sets or -set_ids or a combination of '. '-feature_types -cell_types -experimental_groups -states -analyses'); } elsif($self->set_names && $self->set_ids){ throw('You have specified mutually exclusive filter params for the '. "IdentifySetInputs analysis\nPlease specify restrict to ". 'set_names or -set_ids or a combination other filters '. '(e.g. -experimental_groups -feature_types -cell_types -analyses -states'); } return; } sub run { # Check parameters and do appropriate database/file operations... my $self = shift @_; my %sets; my $set_adaptor = $self->set_adaptor; my $throw = 0; #For set_ids and set_names, catch undef return types if($self->set_ids){ foreach my $id(@{$self->set_ids}){ my $set = $set_adaptor->fetch_by_dbID($id); $sets{$id} = $set; $throw = 1 if ! $set; } } elsif($self->set_names){ foreach my $name(@{$self->set_names}){ my $set = $set_adaptor->fetch_by_name($name); $sets{$name} = $set; $throw = 1 if ! $set; } } else{ #Must be other filters my $constraints = $self->constraints_hash; #$self->param('input_sets', $constraints); #$self->process_params($set_type.'s'); #or add $self->set_and_process_params? if(($self->set_type eq 'data_set') || ($self->set_type eq 'result_set' )){ throw($self->set_type. ' adaptor does not yet support fetch_all({constraints => $constraints,})'); } #Need to account for analysis or format #i.e. we don't want to queue up the Segmentation input_sets #This should not really require and input_set #and should be loaded like and external set i.e. feature_set only #Add string_param_exists here to validate states foreach my $set( @{$set_adaptor->fetch_all( {constraints => $constraints, string_param_exists => 1} )} ){ #warn "Found set ".$set->name; $sets{$set->dbID} = $set; #$self->helper->debug(1, $set->name.'( '.$set->dbID.")"); if($self->param('no_write')){ print STDOUT "\t".$set->name.'( '.$set->dbID.")\n"; } } } if($throw){ throw('Failed to fetch some '.$self->set_type." Sets using names or IDs:". join("n\t", (map {$_.' => '.$sets{$_}} keys %sets))); } $self->param('sets', [values %sets]); return; } #Todo #Enable a preview of what we are going to dataflow here #This will be done using standaloneJob.pl once this can access config from the DB #will probably have to add a no_data_flow flag, which will print out instead of #flow the output_ids sub write_output { # Create the relevant jobs my $self = $_[0]; foreach my $set(@{$self->sets}){ #Flow dbID and name for readability in hive DB #flows to batch jobs (branch 2) e.g. StoreRollbackSets $self->dataflow_output_id({dbID => $set->dbID, set_name => $set->name}, 2); #This should return the job_id #if absent, then this is already present, and nothign would be returned #do we need to warn about this? } return; } 1;