=pod

=head1 NAME

    Bio::EnsEMBL::Funcgen::HiveConfig::Annotation_conf;


=head1 DESCRIPTION

    This is the Config file for the Annotation Pipeline

    Please refer to Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf module to understand the interface implemented here.

    The Annotation pipeline implements Damian;s scripts in /scripts/regulatory_annotation:

    Please see the implementation details in the Runnable modules.

=head1 CONTACT

    Please contact ensembl-dev@ebi.ac.uk mailing list with questions/suggestions.

=cut

package Bio::EnsEMBL::Funcgen::HiveConfig::Annotation_conf;

use strict;
use warnings;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);

use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');

sub default_options {
  my ($self) = @_;
  return {
	  'ensembl_cvs_root_dir' => $ENV{'SRC'},                  # some Compara developers might prefer $ENV{'HOME'}.'/ensembl_main'

	  'pipeline_db' => {
	  		    -host   => $self->o('dbhost'),
	  		    -port   => $self->o('dbport'),
	  		    -user   => $self->o('dbuser'),
	  		    -pass   => $self->o('dbpass'),
	  		    -dbname => $ENV{USER}.'_regfeat_annotation_'.$self->o('dbname'),
			    #-dbname => $self->o('pipedb_name'),
	  		   },

	 };
}

sub resource_classes {
    my ($self) = @_;
    return {
      'default'                    => { 'LSF' => '' },
      'urgent'                     => { 'LSF' => '-q yesterday' },
      'normal_monitored'           => { 'LSF' => "        -R\"select[$ENV{LSF_RESOURCE_HOST}<1000] rusage[$ENV{LSF_RESOURCE_HOST}=10:duration=10:decay=1]\"" },
      'long_monitored'             => { 'LSF' => "-q long -R\"select[$ENV{LSF_RESOURCE_HOST}<1000] rusage[$ENV{LSF_RESOURCE_HOST}=10:duration=10:decay=1]\"" },
      'long_high_memory'           => { 'LSF' => '-q long -M4000000 -R"select[mem>4000] rusage[mem=4000]"' },
      'long_monitored_high_memory' => { 'LSF' => "-q long -M4000000 -R\"select[$ENV{LSF_RESOURCE_HOST}<1000 && mem>4000] rusage[$ENV{LSF_RESOURCE_HOST}=10:duration=10:decay=1,mem=4000]\"" },

#	    0 => { -desc => 'default',          'LSF' => '' },
#	    1 => { -desc => 'urgent',           'LSF' => '-q yesterday' },
#	    2 => { -desc => 'normal ens-genomics2',  'LSF' => '-R"select[myens_genomics2<1000] rusage[myens_genomics2=10:duration=10:decay=1]"' },
#	    3 => { -desc => 'long ens-genomics2',    'LSF' => '-q long -R"select[myens_genomics2<1000] rusage[myens_genomics2=10:duration=10:decay=1]"' },
#	    4 => { -desc => 'long high memory',      'LSF' => '-q long -M4000000 -R"select[mem>4000] rusage[mem=4000]"' },
#	    5 => { -desc => 'long ens-genomics2 high memory',  'LSF' => '-q long -M4000000 -R"select[myens_genomics2<1000 && mem>4000] rusage[myens_genomics2=10:duration=10:decay=1:mem=4000]"' },
	   };
}


=head2 pipeline_wide_parameters

    Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
                  The value doesn't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically).
                  Please see existing PipeConfig modules for examples.

=cut

sub pipeline_wide_parameters {
    my ($self) = @_;
    return {

	    'pipeline_name' => $self->o('pipeline_db', '-dbname'),  # name used by the beekeeper to prefix job names on the farm

	    'work_dir'        => $self->o('work_dir'),   # data directories and filenames
	    #use this as scratch dir or create one specifically?
	    'output_dir'      => $self->o('output_dir').'/annotation/results',
	    'hive_output_dir' => $self->o('output_dir').'/annotation/hive_debug',

	    #Maybe use parameters instead of ENV Variables... use ENV variables in the pipeline ENV
	    "dnadb"	   => {
			       "-host"   => $self->o('dnadb_host'),
			       "-port"   => $self->o('dnadb_port'),
			       "-user"   => $self->o('dnadb_user'),
			       "-dbname" => $self->o('dnadb_name'),
			      },
	    "efgdb"	   => {
			       "-host"   => $self->o('dbhost'),
			       "-port"   => $self->o('dbport'),
			       "-user"   => $self->o('dbuser'),
			       "-pass"   => $self->o('dbpass'),
			       "-dbname" => $self->o('dbname'),
			      },

	    "workdb"	   => {
			       "-host"   => $self->o('workdb_host'),
			       "-port"   => $self->o('workdb_port'),
			       "-user"   => $self->o('workdb_user'),
			       #workdbpass?
			       "-pass"   => $self->o('dbpass'),
			      },
	    #This could be inferred from the db, but it's probably safer(?) to pass as parameter...
	    "species"      => $self->o('species'),

	    #"release"      => $self->o('release'),


    };
}

=head2 pipeline_create_commands

    Description : Implements pipeline_create_commands() interface method of
      Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that lists the commands
      that will create and set up the Hive database.

=cut

sub pipeline_create_commands {
  my ($self) = @_;
  return [
	  #HiveGeneric assumes ensembl-hive folder while if you use the stable version it's ensembl-hive_stable!
	  @{$self->SUPER::pipeline_create_commands},  # inheriting database and hive tables creation

	  #'mysql '.$self->dbconn_2_mysql('pipeline_db', 0)." -e 'CREATE DATABASE ".$self->o('pipeline_db', '-dbname')."'",

	  # standard eHive tables and procedures:
	  #'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/tables.sql',
	  #'mysql '.$self->dbconn_2_mysql('pipeline_db', 1).' <'.$self->o('ensembl_cvs_root_dir').'/ensembl-hive/sql/procedures.sql',

	  #Create hive output folders as required
	  'mkdir -p '.$self->o('output_dir').'/annotation/results',
	  'mkdir -p '.$self->o('output_dir').'/annotation/hive_debug',


	 ];
}

=head2 pipeline_analyses

    Description : Implements pipeline_analyses() interface method of
      Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc.


=cut

sub pipeline_analyses {
  my ($self) = @_;

  return [
	  {

	   -logic_name => 'setup_pipeline',
	   -module     => 'Bio::EnsEMBL::Funcgen::RunnableDB::SetupAnnotationPipeline',
	   -parameters => {},
	   -input_ids => [
			  # No initial input_ids... these will be added as needed by init_pipeline -job_topup
			  { },

			 ],
	   -flow_into => {
			  2 => [ 'annotate_regulatory_features' ],
			  #3 => [ 'wrap_up_pipeline' ],
			 },
	   #These jobs cannot run in parallel due to race conditions! Do NOT change this setting unless you know what you're doing
	   -hive_capacity => 1,
	   -rc_name => 'default',
	  },

	  {
	   -logic_name    => 'annotate_regulatory_features',
	   -module        => 'Bio::EnsEMBL::Funcgen::RunnableDB::AnnotateRegulatoryFeatures',
	   -parameters    => { },
	   -input_ids     => [
				 # (jobs for this analysis will be flown_into via branch-2 from 'setup_pipeline' jobs above)
			     ],
	   #Since all the weight is in the database it is safer to run only one at a time... or a small number at least
	   -hive_capacity => 1,
	   #Control files should be handled by setup_pipeline.
	   -rc_name => 'long_monitored_high_memory', # Better safe than sorry... size of datasets tends to increase...
	   -wait_for => [ 'setup_pipeline' ]
	  },

	 ];
}

1;