=pod =head1 NAME Bio::EnsEMBL::Funcgen::HiveConfig::MotifFinder_conf; =head1 SYNOPSIS # Example 1: specifying only the mandatory options (initial params are taken from defaults) init_pipeline.pl Bio::EnsEMBL::Funcgen::HiveConfig::*_conf -password # Example 2: specifying the mandatory options as well as setting initial params: init_pipeline.pl Bio::EnsEMBL::Funcgen::HiveConfig::*_conf -password -p1name p1value -p2name p2value # Example 3: do not re-create the database, just load more tasks into an existing one: init_pipeline.pl Bio::EnsEMBL::Funcgen::HiveConfig::*_conf -job_topup -password -p1name p1value -p2name p2value =head1 DESCRIPTION This is the Config file for the Motif Finder Pipeline Please refer to Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf module to understand the interface implemented here. The Motif Finder pipeline consists of several "analysis": * SetupMotifPipeline * InferSubMotifs * ClusterMotifs Please see the implementation details in Runnable modules themselves. =head1 CONTACT Please contact ensembl-dev@ebi.ac.uk mailing list with questions/suggestions. =cut package Bio::EnsEMBL::Funcgen::HiveConfig::MotifFinder_conf; use strict; use warnings; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump); use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly =head2 default_options Description : Implements default_options() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that is used to initialize default options. =cut sub default_options { my ($self) = @_; return { 'ensembl_cvs_root_dir' => $ENV{'SRC'}, 'pipeline_db' => { -host => $self->o('dbhost'), -port => $self->o('dbport'), -user => $self->o('pipeuser'), -pass => $self->o('pipepass'), #-dbname => $ENV{USER}.'_peaks_'.$self->o('dbname'), -dbname => $self->o('pipedb_name'), }, 'dnadb_host' => 'ens-livemirror', 'dnadb_port' => 3306, "dnadb_user" => 'ensro', 'bin_dir' => "/software/ensembl/funcgen", 'bin_size' => 500, 'window_size' => 50, }; } =head2 resource_classes Description : Implements resource_classes() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that lists the LSF resource classes available =cut sub resource_classes { my ($self) = @_; return { 'default' => { 'LSF' => '' }, 'urgent' => { 'LSF' => '-q yesterday' }, 'normal_monitored' => { 'LSF' => " -R\"select[$ENV{LSF_RESOURCE_HOST}<600 && $ENV{LSF_DNA_DB}<600] rusage[$ENV{LSF_DNA_DB}=10:$ENV{LSF_RESOURCE_HOST}=10:duration=10:decay=1]\"" }, 'long_monitored' => { 'LSF' => "-q long -R\"select[$ENV{LSF_RESOURCE_HOST}<1000] rusage[$ENV{LSF_RESOURCE_HOST}=10:duration=10:decay=1]\"" }, 'long_high_memory' => { 'LSF' => '-q long -M4000000 -R"select[mem>4000] rusage[mem=4000]"' }, # 0 => { -desc => 'default', 'LSF' => '' }, # 1 => { -desc => 'urgent', 'LSF' => '-q yesterday' }, # 2 => { -desc => 'normal ens-genomics1', 'LSF' => '-R"select[myens_genomics1<600 && myens_livemirror<600] rusage[myens_livemirror=10:myens_genomics1=10:duration=10:decay=1]"' }, # 3 => { -desc => 'long ens-genomics1', 'LSF' => '-q long -R"select[myens_genomics1<1000] rusage[myens_genomics1=10:duration=10:decay=1]"' }, # 4 => { -desc => 'long high memory', 'LSF' => '-q long -M4000000 -R"select[mem>4000] rusage[mem=4000]"' }, }; } =head2 pipeline_wide_parameters Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs. The value doesn't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically). Please see existing PipeConfig modules for examples. =cut sub pipeline_wide_parameters { my ($self) = @_; return { 'pipeline_name' => $self->o('pipedb_name'), # name used by the beekeeper to prefix job names on the farm 'output_dir' => $self->o('work_dir')."/motifs/results", 'hive_output_dir' => $self->o('work_dir')."/motifs/hive_output", 'dbhost' => $self->o('dbhost'), 'dbport' => $self->o('dbport'), "dbuser" => $self->o("dbuser"), "dbname" => $self->o("dbname"), 'dnadb_host' => $self->o('dnadb_host'), 'dnadb_port' => $self->o('dnadb_port'), "dnadb_user" => $self->o("dnadb_user"), "dnadb_name" => $self->o("dnadb_name"), "species" => $self->o("species"), 'bin_dir' => $self->o('bin_dir'), 'bin_size' => $self->o('bin_size'), 'window_size' => $self->o('window_size'), }; } =head2 pipeline_create_commands Description : Implements pipeline_create_commands() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that lists the commands that will create and set up the Hive database. =cut sub pipeline_create_commands { my ($self) = @_; return [ @{$self->SUPER::pipeline_create_commands}, # inheriting database and hive tables creation #'mysql '.$self->dbconn_2_mysql('pipeline_db', 0)." -e 'CREATE DATABASE ".$self->o('pipeline_db', '-dbname')."'", # standard eHive tables and procedures: #'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_hive_root_dir').'/sql/tables.sql', #'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_hive_root_dir').'/sql/procedures.sql', #Create hive output folders as required 'mkdir -p '.$self->o('work_dir')."/motifs/results", 'mkdir -p '.$self->o('work_dir')."/motifs/hive_output", ]; } =head2 pipeline_analyses Description : Implements pipeline_analyses() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc. =cut sub pipeline_analyses { my ($self) = @_; return [ { -logic_name => 'setup_pipeline', -module => 'Bio::EnsEMBL::Funcgen::RunnableDB::SetupMotifInference', -parameters => { 'batch_job' => 0, 'prepared' => 0 }, -input_ids => [ # No initial input_ids... these will be added as needed by init_pipeline -job_topup { 'feature_set' => $self->o('feature_set') }, ], -flow_into => { 2 => [ 'infer_submotifs' ], 3 => [ 'cluster_motifs' ], }, -hive_capacity => 10, -rc_name => 'default', }, { #This basically consists on running a command... -logic_name => 'infer_submotifs', -module => 'Bio::EnsEMBL::Funcgen::RunnableDB::InferMotifs', -parameters => { }, -input_ids => [ # (jobs for this analysis will be flown_into via branch-1 from 'setup_pipeline' jobs above) ], -hive_capacity => 100, -rc_name => 'normal_monitored', #use semaphores... #-wait_for => [ 'setup_pipeline' ] }, { #This basically consists on running a command... -logic_name => 'cluster_motifs', -module => 'Bio::EnsEMBL::Funcgen::RunnableDB::ClusterMotifs', -parameters => { }, -input_ids => [ # (jobs for this analysis will be flown_into via branch-2 from 'setup_pipeline' jobs above) ], -hive_capacity => 10, -rc_name => 'default', #Use semaphores... #-wait_for => [ 'run_peaks_DNAse', 'run_peaks' ], }, ]; } 1;