=pod =head1 NAME Bio::EnsEMBL::Funcgen::RunnableDB::DefineOuputSet =head1 DESCRIPTION =cut package Bio::EnsEMBL::Funcgen::Hive::DefineOutputSet; use warnings; use strict; use Bio::EnsEMBL::Utils::Exception qw (throw); use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(scalars_to_objects); use base ('Bio::EnsEMBL::Funcgen::Hive::BaseDB'); #This assumes the InputSet has been previously registered, #and now we want simply to define/fetch the data set, feature and result set based on these data. #todo -slice_import_status? sub fetch_input { # fetch parameters... my $self = shift @_; $self->SUPER::fetch_input; #This is only redefining it local to this process, not to the hive tables anywhere #Flow this later #todo set in Base, based on pipeline_name or DBNAME? #$self->set_dir_param('output_dir', # $self->param('root_output_dir').'/'.$self->param('pipeline_name').'/'.$self->param('set_name'), # 1); #create flag #Now set in BaseDB::pipeline_wide_parameters my ($iset) = @{&scalars_to_objects($self->out_db, 'InputSet', 'fetch_by_dbID', [$self->param('dbID')] ) }; if(! defined $iset){ throw('Cannot fetch '.$self->param('name'). ' ('.$self->param('dbID').') InputSet from the database'); } $self->param('input_set', $iset); #refactor this default_analysis method in BaseDB? my %default_analyses; my @set_types = ('result_set'); push @set_types, 'feature_set' if ! $self->param('result_set_only'); #TODO Validate default set analysis keys exist as feature_type class or name? #This would fail for species with low coverage i.e. some names may be absent foreach my $set_type( @set_types ){ my $set_lname = $self->param($set_type.'_analysis'); if(! defined $set_lname){ $default_analyses{$set_type} = $self->param('default_'.$set_type.'_analyses'); #catch with param_silent rahter than param_required as it is #not mandatory if set_type analysis is defined if(! defined $default_analyses{$set_type}){ throw("Please define -${set_type}_analysis or add to default_${set_type}_analyses". " in the default_options config"); } if(exists $default_analyses{$set_type}{$iset->feature_type->name}){ $set_lname = $default_analyses{$set_type}{$iset->feature_type->name}; } elsif(exists $default_analyses{$set_type}{$iset->feature_type->class}){ $set_lname = $default_analyses{$set_type}{$iset->feature_type->class}; } else{ throw("No default $set_type analysis available for ".$iset->feature_type->name. '('.$iset->feature_type->class. ").\n Please add FeatureType name or class to default_${set_type}_analyses ". "in default_options config or specify -${set_type}_analysis"); } } #Catch undefs in config if(! defined $set_lname){ throw("Unable to identify defined $set_type analysis in config for ".$iset->feature_type->name. ".\nPlease define in default_${set_type}_analyses config or specify -${set_type}_analysis"); } #can't use process_params here as the param name does match the object name #We could over-ride this with a second arrayref of class names $self->param($set_type.'_analysis', &scalars_to_objects($self->out_db, 'Analysis', 'fetch_by_logic_name', [$set_lname])->[0]); } return; } #TODO handle ResultSet only run #This would take a flag and create the ResultSet in isolation #would need to validate not FeatureSet stuff was set #this might clash with some defaults if we had mixed set types in the pipeline #i.e. some with and some without datasets #feature_set_analysis is always created dynamically anyway, so this would be dependant on #-result_set_only sub run { # Check parameters and do appropriate database/file operations... my $self = shift @_; my $helper = $self->helper; my $iset = $self->param('input_set'); my $set; #todo migrate this to the Importer as define_OutputSet #there is some overlap here of param validation between BaseImporter and hive #Never set -FULL_DELETE here! #It is unwise to do this in a pipeline and should be handled #on a case by case basis using a separate rollback script #Should also never really specify recover here either? #This bascailly ignores the fact that a ResultSet may be lin ked to other DataSets if( $self->param('result_set_only') ){ throw('Pipeline does not yet support creation of a ResultSet without and associated Feature/DataSet'); $set = $helper->define_ResultSet ( -NAME => $iset->name,#.'_'.$rset_anal->logic_name, #-FEATURE_CLASS => result | dna_methylation', #currently set dynamically in define_ResultSet -SUPPORTING_SETS => [$iset], -DBADAPTOR => $self->out_db, -RESULT_SET_ANALYSIS => $self->param('result_set_analysis'), -RESULT_SET_MODE => $self->param('result_set_mode'), -ROLLBACK => $self->param('rollback'), -RECOVER => $self->param('recover'), -SLICES => $self->param('slices'), -CELL_TYPE => $iset->cell_type, -FEATURE_TYPE => $iset->feature_type, ); } else{ my $fset_anal = $self->param('feature_set_analysis'); $set = $helper->define_DataSet ( -NAME => $iset->name.'_'.$fset_anal->logic_name, -FEATURE_CLASS => 'annotated', #Is there overlap with rset feature_class here? -SUPPORTING_SETS => [$iset], -DBADAPTOR => $self->out_db, -FEATURE_SET_ANALYSIS => $fset_anal, -RESULT_SET_ANALYSIS => $self->param('result_set_analysis'), -RESULT_SET_MODE => $self->param('result_set_mode'), -ROLLBACK => $self->param('rollback'), -RECOVER => $self->param('recover'), -SLICES => $self->param('slices'), -CELL_TYPE => $iset->cell_type, -FEATURE_TYPE => $iset->feature_type, #-DESCRIPTION => ? #-DISPLAY_LABEL => ? ); } #No tracking required here? #Todo review whether rollback handles status entries correctly #Add set_type here as result_set_only could be change between writing #this output_id and running a down stream analysis $self->param('output_id', {dbID => $set->dbID, set_name => $set->name, set_type => ($self->param('result_set_only') ? 'ResultSet' : 'DataSet')} ); return 1; } sub write_output { # Create the relevant jobs my $self = $_[0]; $self->dataflow_output_id($self->param('output_id'), 1); return; } 1;