#!/usr/local/bin/perl

use strict;
use warnings;

use lib qw(/home/rmott/meme/lib/perl);

use Cwd qw(abs_path);
use Fcntl qw(O_CREAT O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Temp qw(tempfile);
use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir);
use Getopt::Long;
use HTML::Template;
use List::Util qw(min max);
use Pod::Usage;

use Globals;

=head1 NAME

meme-chip - Does an automated analysis of a ChIPseq DNA sequence dataset with the MEME toolchain.

=head1 SYNOPSIS

meme-chip [options] <sequences>

 Options:
  -o                <dir>   : output to the specified directory, failing if the directory exists
  -oc               <dir>   : output to the specified directory, overwriting if the directory exists
  -index-name       <name>  : name of html index file; default: index.html
  -db               <path>  : target database for use by TOMTOM and AME, if not present then TOMTOM and AME are not run
  -bfile            <path>  : background file
  -nmeme            <num>   : limit of sequences to pass to MEME
  -ccut             <num>   : maximum size of a sequence before it is cut down 
                              to a centered section; a value of 0 indicates the
                              sequences should not be cut down; default: 100
  -desc             <text>  : description of the job
  -fdesc            <file>  : file containing plain text description of the job
  -run-mast                 : run MAST - motif alignment & search tool
  -run-ama                  : run AMA - Average motif affinity. 
  -noecho                   : don't echo the commands run
  -tar                      : create a tar.gz file of the outputs
  -help                     : display this help message

 MEME Specific Options:
  -meme-mod [oops|zoops|anr]: sites used in a single sequence
  -meme-minw        <num>   : minimum motif width
  -meme-maxw        <num>   : maximum motif width
  -meme-nmotifs     <num>   : maximum number of motifs to find
  -meme-minsites    <num>   : minimum number of sites per motif
  -meme-maxsites    <num>   : maximum number of sites per motif
  -meme-time        <secs>  : maximum time to run MEME in seconds
  -meme-p           <np>    : use parallel version with <np> processors
  -meme-norevcomp           : search given strand only
  -meme-pal                 : look for palindromes only

=cut

# Setup logging
my $logger = undef;
eval {
  require Log::Log4perl;
  Log::Log4perl->import();
  Log::Log4perl::init('/home/rmott/meme/etc/logging.conf');
  $logger = Log::Log4perl->get_logger('meme.service.memechip');
};

# Log commandline
if ($logger && $logger->is_debug()) {
  $logger->debug("Starting MEME-ChIP");
  $logger->debug(join(" ", 'meme-chip', @ARGV));
}

# Constants
my $bindir = '/home/rmott/meme/bin';
my $site_url = 'http://meme.nbcr.net/meme';
my $template_file = '/home/rmott/meme/etc/meme-chip.tmpl';
my $tmpdir = '';
# use the perl default if none is supplied or the replace fails
$tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/);

# Required Argument
my $sequences;

# General Options
my $help = 0; # FALSE
my $echo = 1; # TRUE
my $tar = 0; # FALSE
my $run_mast = 0; # FALSE
my $run_ama = 0; # FALSE
my $appverbosity = 1; #all the applications write to stderr currently so had to disable this
my $clobber = 1; # TRUE
my $outdir = 'memechip_out';
my $index_name = 'index.html';
my $desc = undef;
my $nmeme = $MAXMEMESEQS;
my $ccut = $CMAX;
my @dbs = ();
my $bfile = undef;


# MEME Specific Options
my $meme_minw = undef; # default is 6
my $meme_maxw = undef; # default is 30
my $meme_mod = 'zoops';
my $meme_nmotifs = 3;
my $meme_minsites = undef;
my $meme_maxsites = undef;
my $meme_time = undef;
my $meme_p = undef;
my $meme_norevcomp = 0; # FALSE
my $meme_palindromes = 0; # FALSE

# Derived Globals
my $outfile;
my $stderr_txt;
my $stdout_txt;
my @tomtom_dbnames = ();
my @desc_para = ();

# Report Globals
my @report_inputs = ();
my @report_outputs = ();
my @report_commands = ();
my $report_groups = [{group => "Results", items => \@report_outputs}, {group => "Data", items => \@report_inputs}];

# Parse Options
$logger->debug("Parsing options") if $logger;
my $options_success = 0; # FALSE
my $options_err = undef;
eval {
  # redirect stderr to a temp file as we want to log it
  my $olderr;
  my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
  open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
  open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
  # parse options
  $options_success = GetOptions(
    'help|?'          => \$help,
    'noecho'          => sub {$echo = 0},
    'tar'             => \$tar,
    'run-mast'        => \$run_mast,
    'run-ama'         => \$run_ama,
    'o=s'             => sub {$clobber = 0; shift; $outdir = shift},
    'oc=s'            => \$outdir,
    'index-name=s'    => \$index_name,
    'desc=s'          => \$desc,
    'fdesc=s'         => # slurp the description into a scalar
    sub {
      my ($param, $file) = @_;
      open(my $fh, '<', $file) or die("Couldn't open description file: $!");
      $desc = do { local( $/ ); <$fh>};
      close($fh);
    },
    'nmeme=i'           => \$nmeme,
    'ccut=i'            => \$ccut,
    'db=s'              => \@dbs,
    'bfile=s'           => \$bfile,
    'meme-mod=s'        => \$meme_mod,
    'meme-minw=i'       => \$meme_minw, 
    'meme-maxw=i'       => \$meme_maxw, 
    'meme-nmotifs=i'    => \$meme_nmotifs, 
    'meme-minsites=i'   => \$meme_minsites,
    'meme-maxsites=i'   => \$meme_maxsites,
    'meme-time=i'       => \$meme_time,
    'meme-p=i'          => \$meme_p,
    'meme-norevcomp'    => \$meme_norevcomp,
    'meme-pal'          => \$meme_palindromes,
  );
  # reset STDERR
  open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
  # slurp errors
  seek($tmperr, SEEK_SET, 0);
  chomp($options_err = do {local ($/); <$tmperr>});
  close($tmperr);
};
# check for eval problems
if ($@) {
  $logger->fatal($@) if $logger;
  die($@);
}
# check for problems parsing options
if (!$options_success) {
  $logger->fatal($options_err) if $logger;
  pod2usage($options_err);
}

pod2usage(1) if $help;

# Validate Options and Arguments
$logger->debug("Validating options") if $logger;
eval {
  die("A sequences file must be specified.") unless @ARGV;
  $sequences = shift @ARGV;
  die("The sequences file (\"".$sequences."\") does not exist.") unless (-e $sequences);
  # validate background
  die("The background file (\"". $bfile ."\") specified by option bg does not exist.") if ($bfile && not -e $bfile);
  # validate nmeme
  die("Option nmeme must be at least 1. Got $nmeme.") if ($nmeme < 1);
  # validate ccut
  die("Option ccut must be larger or equal to $MINMEMESEQW. Got $ccut.") if ($ccut < $MINMEMESEQW && $ccut != 0);
  # validate meme-mod
  if ($meme_mod =~ m/^(oops|zoops|anr)$/) {
    $meme_mod = $1;
  } else {
    die("Option meme-mod must be either oops, zoops or anr. Got $meme_mod.");
  }
  # validate meme-minw and meme-maxw, setting defaults if required
  die("Option meme-minw must be between $MINW and $MAXW. Got $meme_minw.") if (defined($meme_minw) && ($meme_minw < $MINW || $meme_minw > $MAXW));
  if (defined($meme_maxw) && ($meme_maxw < $MINW || $meme_maxw > $MAXW || (defined($meme_minw) && $meme_maxw < $meme_minw))) {
    die("Option meme-maxw must be between $MINW and $MAXW and larger or equal to option meme-minw if it is supplied. Got $meme_maxw.");   
  }
  $meme_minw = (defined($meme_maxw) ? min(6, $meme_maxw) : 6) unless defined($meme_minw);
  $meme_maxw = max($meme_minw, 30) unless defined($meme_maxw);
  # validate meme-nmotifs
  die("Option meme-nmotifs must be at least 1. Got $meme_nmotifs.") if ($meme_nmotifs < 1);
  # validate meme-minsites and meme-maxsites
  if (defined($meme_minsites) && ($meme_minsites < $MINSITES || $meme_minsites > $MAXSITES)) {
    die("Option meme-minsites must be between $MINSITES and $MAXSITES. Got $meme_minsites.");
  }
  if (defined($meme_maxsites) && ($meme_maxsites < $MINSITES || $meme_maxsites > $MAXSITES || 
      (defined($meme_minsites) && $meme_maxsites < $meme_minsites))) {
    die("Option meme-maxsites must be between $MINSITES and $MAXSITES and ".
      "larger or equal to option meme-minsites if it is supplied. Got $meme_maxsites.");
  }
  # validate meme-time
  die("Option meme-time must be at least 1. Got $meme_time.") if (defined($meme_time) && $meme_time < 1);
  # validate meme-p
  die("Option meme-p must be at least 1. Got $meme_p.") if (defined($meme_p) && $meme_p < 1);
  # validate Tomtom (and AME) databases
  foreach my $db (@dbs) {
    die("Missing Tomtom database \"".$db."\"") unless (-e $db);
  }
};
if ($@) {
  $logger->fatal($@) if $logger;
  pod2usage($@);
}

# Do this outside of main because these bits are needed to make
# the html output which main relys on for error reporting
eval {
  # convert the tomtom dbs into a nicer readable form
  $logger->debug("Parsing db names") if $logger;
  foreach my $db (@dbs) {
    my $dbname = fileparse($db, '.meme');
    $dbname =~ s/_/ /g; #replace underscores
    push(@tomtom_dbnames, $dbname);
  }
  # parse the description into paragraphs and lines
  # a paragraph is seperated from other paragraphs by
  # multiple new line characters
  $logger->debug("Parsing description") if $logger;
  if ($desc) {
    my @paragraphs = split(/\n\n/, $desc);
    for my $paragraph (@paragraphs) {
      my @para_lines = split(/\n/, $paragraph);
      my $text = shift(@para_lines);
      my @lines = ();
      foreach my $line (@para_lines) {
        push(@lines, {text => $line});
      }
      push(@desc_para, {text => $text, lines => \@lines});
    }
  }
  # Create the output directory
  $logger->debug("Creating output directory") if $logger;
  if (-e $outdir) {
    die("Output directory \"$outdir\" already exists\n") if (!$clobber);
  } else {
    mkdir($outdir) or die("Failed to create output directory: $!");
    push(@report_commands, {command => 'mkdir ' . &quote_spaces($outdir)});
  }
  # calculate some file names
  $outfile = catfile($outdir, $index_name);
  $stdout_txt = catfile($outdir,'stdout.txt');
  $stderr_txt = catfile($outdir, 'stderr.txt');
};
if ($@) {
  $logger->fatal($@) if $logger;
  die($@);
}

# Run main which does most of the work and if it completes mark the html as done.
$logger->debug("Running main") if $logger;
eval {
  &main();
  &write_html("", 0); # html to go in tar
  if ($tar) {
    my $tarname = &tar_output();
    &write_html("", 0, undef, $tarname); # index which links to tar
  }
};
# If errors occured then write out the html meantioning the errors, log and die
if ($@) {
  my $msg = $@;
  eval {
    &write_html("Error!", 0, $msg);
  };
  $logger->fatal($msg) if ($logger);
  die($msg);
}

################################################################################
#
#       write_html
#       
#       Writes out the html to a file.
#
################################################################################
sub write_html {
  my $fh;
  my ($status_text, $refresh, $error_msg, $tar) = @_;
  # give defaults for unsupplied parameters
  $refresh = 60 unless defined($refresh);
  $status_text = "Running" unless defined($status_text);
  # open the file for writing
  sysopen($fh, $outfile, O_CREAT | O_WRONLY | O_TRUNC) or die("Failed to open \"$outfile\".");
  # create the template
  my $template = HTML::Template->new(filename => $template_file);
  my $data = {status => $status_text, desc => \@desc_para, error => $error_msg, 
      refresh => $refresh, groups => $report_groups, commands => \@report_commands, 
      command_rows => scalar(@report_commands) * 2, site_url => $site_url, tar => $tar};

  $template->param(%{$data});
  # write the template to file
  print $fh $template->output();
  # close the file
  close($fh) or die("Failed to close \"$outfile\".");
}

################################################################################
#
#       tar_output
#       
#       tars the output directory
#
################################################################################
sub tar_output {
  my $seqs = fileparse($sequences);
  my @outfiles = ($seqs, 'seqs-centered', 'seqs-shuffled', 'seqs-sampled', 
      'seqs-discarded', 'seqs-centered_w_bg', 'index.html');
  my @outdirs = ('meme_out', 'meme_tomtom_out', 'meme_mast_out', 
      'meme_ama_out', 'dreme_out', 'dreme_tomtom_out', 'dreme_mast_out', 
      'dreme_ama_out', 'ame_out');
  foreach my $dir (@outdirs) {
    push(@outfiles, $dir, $dir . '_stderr.txt');
  }
  # check all the files exist (otherwise tar will fail)
  my $i;
  for ($i = 0; $i < scalar(@outfiles); $i++) {
    unless (-e catfile($outdir, $outfiles[$i])) {
      # delete the missing file
      splice @outfiles, $i, 1;
      $i--;
    }
  }
  # we want to tar including the outdir to avoid a tarbomb
  # find the real name of the outdir (user could have passed '.')
  my $folder = (splitdir(abs_path($outdir)))[-1];
  my $folder_dir = abs_path(catdir($outdir, '..'));
  # append the folder to the outfiles
  for (my $i = 0; $i < scalar(@outfiles); $i++) {
    $outfiles[$i] = catfile($folder, $outfiles[$i]);
  }
  my $tarname = $folder . ".tar.gz";
  my $tarfile = catfile($outdir, $tarname);
  # run tar
  system('tar', '-czf', $tarfile, '-C', $folder_dir, @outfiles);
  my $errmsg = '';
  if ($? == -1) {
    $errmsg = "Failed to execute tar: $!";
  } elsif ($? & 127) {
    $errmsg = sprintf("tar died with signal %d, %s coredump.",
        ($? & 127), ($? & 128) ? 'with' : 'without');
  } elsif ($? != 0) {
    $errmsg = sprintf("tar exited with value %d indicating failure.", $? >> 8);
  }
  die($errmsg) if $errmsg;
  return $tarname;
}

################################################################################
#
#       main
#       
#       Run all the programs
#
################################################################################
sub main {
  my $seqs_centered = catfile($outdir, "seqs-centered");
  my $seqs_shuffled = catfile($outdir, "seqs-shuffled");
  my $seqs_sampled = catfile($outdir, "seqs-sampled");
  my $seqs_discarded = catfile($outdir, "seqs-discarded");
  my $seqs_final;
  my $num_ame_sequences;
  my $ame_input;

  ####################################################################
  # set up and record the inputs
  ####################################################################
  $logger->debug("Setting up the sequence inputs") if $logger;

  push(@report_inputs, &report_job ("Input Sequences:", [$sequences,"",""], "Your original untrimmed sequences."));
  &write_html();

  if ($ccut > 0) {
    push(@report_commands, &run_job 
      ($bindir, "fasta-center", ["-len", $ccut], $sequences, $seqs_centered));
    push(@report_inputs, &report_job ("fasta-center output:", [$seqs_centered,"",""], 
        "Your sequences centered and trimed to width $ccut."));
    &write_html();
  } else { # centering disabled
    $seqs_centered = $sequences;
  }

  my $num_centered_sequences = &getsize($seqs_centered);

  my $dinucargs = ["-f", $seqs_centered, "-t", "-dinuc"];
  push(@report_commands, &run_job ($bindir, "fasta-dinucleotide-shuffle", $dinucargs, undef, $seqs_shuffled));
  push(@report_inputs, &report_job ("fasta-dinucleotide-shuffle output:", [$seqs_shuffled,"",""],
    "The centered sequences randomly shuffled maintaining their dinucleotide frequency, used as a background for DREME and AME."));
  &write_html();

  ####################################################################
  # set up inputs for AME
  ####################################################################
  $logger->debug("Setting up the AME inputs") if $logger;


  $num_ame_sequences = $num_centered_sequences;
  $ame_input = $seqs_centered."_w_bg";
  push(@report_commands, &run_job (undef, "cat", [$seqs_centered, $seqs_shuffled], undef, $ame_input));
  push(@report_inputs, &report_job ("AME input:", [$ame_input,"",""], "The centered sequences followed by the same ".
    "sequences after dinucleotide shuffling."));
  &write_html();

  ##################################################################
  # set up inputs for MEME
  ##################################################################
  $logger->debug("Setting up the MEME inputs") if $logger;

  # subsample to maximum sequences we want for MEME if too many sequences
  $seqs_final = $seqs_centered;

  my $meme_sample_source = "";
  if ($num_centered_sequences > $nmeme) {
    $meme_sample_source = " $nmeme (randomly chosen)";
    push(@report_commands, &run_job($bindir, "fasta-subsample", [$seqs_centered, $nmeme, "-rest", $seqs_discarded], undef, $seqs_sampled));
    $seqs_final = $seqs_sampled;
    push(@report_inputs, &report_job ("fasta-subsample (used) output:", [$seqs_sampled,"",""], 
        "A random sample of $nmeme of the centered sequences, used by MEME."));
    push(@report_inputs, &report_job (
        "fasta-subsample (discarded) output:", [$seqs_discarded,"",""],
        "The centered sequences omitted from the sample used by MEME."));
    &write_html();
  }
  my $level = 0;
  ##################################################################
  # Run MEME
  ##################################################################
  my $meme_prog = "meme";
  my $meme_outdir = "meme_out";
  # TODO original file name is not recorded
  #'-sf', D:\\ERK2\\ERK221_BKG09\\ERK221_BKG09_MACS_top1000.fasta
  my @meme_args = ($seqs_final, '-oc', catdir($outdir, $meme_outdir), 
      '-dna', '-mod', $meme_mod, '-nmotifs', $meme_nmotifs, 
      '-minw', $meme_minw, '-maxw', $meme_maxw);
  push(@meme_args, '-bfile', $bfile) if defined($bfile);
  push(@meme_args, '-minsites', $meme_minsites) if defined($meme_minsites);
  push(@meme_args, '-maxsites', $meme_maxsites) if defined($meme_maxsites);
  push(@meme_args, '-time', $meme_time) if defined($meme_time);
  push(@meme_args, '-p', $meme_p) if defined($meme_p);
  push(@meme_args, '-revcomp') unless $meme_norevcomp;
  push(@meme_args,'-nostatus') unless $appverbosity >= 2;
  my $meme_outputs = ["meme.html", "meme.txt", "meme.xml"];
  my $meme_comment = 'Motifs discovered in'.$meme_sample_source.' trimmed (central '.$ccut.'bp) input sequences.';
  my $meme_status = &report_and_run(\@report_commands, \@report_outputs, 
      $level, $bindir, $meme_prog, \@meme_args, $meme_outputs, $meme_outdir, $meme_comment);
  my $meme_motifs = catfile($outdir, $meme_outdir, 'meme.txt');
  ##################################################################
  # Run programs dependant on MEME
  ##################################################################
  if ($meme_status == 0) {
    $level++;
    ##################################################################
    # Run TOMTOM on MEME motifs
    ##################################################################
    if (@dbs) {
      my $tomtom_prog = "tomtom";
      my $tomtom_outdir = "meme_tomtom_out";
      my @tomtom_args = ('-verbosity', $appverbosity, '-oc', catdir($outdir, $tomtom_outdir), '-min-overlap', 5, 
          '-dist', 'pearson', '-evalue', '-thresh', 0.1, '-no-ssc');
      push(@tomtom_args, '-bfile', $bfile) if defined($bfile);
      push(@tomtom_args, $meme_motifs, @dbs);
      my $tomtom_outputs = ["tomtom.html", "tomtom.txt", "tomtom.xml"];
      my $tomtom_comment = 'Motifs from '. join(", ", @tomtom_dbnames) .' that match motifs MEME discovers.';
      &report_and_run(\@report_commands, \@report_outputs,
        $level, $bindir, $tomtom_prog, \@tomtom_args, $tomtom_outputs, $tomtom_outdir, $tomtom_comment);
    }
    ##################################################################
    # Run MAST on MEME motifs
    ##################################################################
    if ($run_mast) {
      my $mast_prog = "mast";
      my $mast_outdir = "meme_mast_out";
      my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences);
      push(@mast_args, '-bfile', $bfile) if defined($bfile);
      push(@mast_args,'-nostatus') unless $appverbosity >= 2;
      my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
      my $mast_comment = 'Predicted locations of all MEME motifs (<i>p</i> &lt; 0.0001) in the input sequences.';
      &report_and_run(\@report_commands, \@report_outputs,
          $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
    }
    ##################################################################
    # Run AMA on MEME motifs
    ##################################################################
    if ($run_ama) {
      my $ama_prog = "ama";
      my $ama_outdir = "meme_ama_out";
      my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
      push(@ama_args, '--sdbg', 0) unless defined($bfile);
      push(@ama_args, $meme_motifs, $sequences);
      push(@ama_args, $bfile) if defined($bfile);
      my $ama_outputs = ["", "ama.txt", "ama.xml"];
      my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.';
      &report_and_run(\@report_commands, \@report_outputs,
          $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
    }

    $level--;
  }
  ##################################################################
  # Run DREME
  ##################################################################
  my $dreme_prog = "dreme";
  my $dreme_outdir = "dreme_out";
  my @dreme_args = ('-v', $appverbosity, '-oc', $dreme_outdir, '-p', $seqs_centered, '-n', $seqs_shuffled, '-png');
  my $dreme_outputs = ["dreme.html", "dreme.txt", "dreme.xml"];
  my $dreme_comment = 'Motifs discovered in the trimmed (central '.$ccut.'bp) input sequences.';
  my $dreme_motifs = catfile($outdir, $dreme_outdir, 'dreme.txt');
  my $dreme_status = &report_and_run(\@report_commands, \@report_outputs, 
      $level, $bindir, $dreme_prog, \@dreme_args, $dreme_outputs, 
      $dreme_outdir, $dreme_comment);
  ##################################################################
  # Run programs dependant on DREME
  ##################################################################
  if ($dreme_status == 0) {
    $level++;
    ##################################################################
    # Run TOMTOM on DREME motifs
    ##################################################################
    if (@dbs) {
      my $tomtom_prog = "tomtom";
      my $tomtom_outdir = "dreme_tomtom_out";
      my @tomtom_args = ('-verbosity', $appverbosity, '-oc', catdir($outdir, $tomtom_outdir), '-min-overlap', 5, 
          '-dist', 'pearson', '-evalue', '-thresh', 0.1, '-no-ssc');
      push(@tomtom_args, '-bfile', $bfile) if defined($bfile);
      push(@tomtom_args, $dreme_motifs, @dbs);
      my $tomtom_outputs = ["tomtom.html", "tomtom.txt", "tomtom.xml"];
      my $tomtom_comment = 'Motifs from '. join(", ", @tomtom_dbnames) .' that match motifs DREME discovers.';
      &report_and_run(\@report_commands, \@report_outputs,
        $level, $bindir, $tomtom_prog, \@tomtom_args, $tomtom_outputs, $tomtom_outdir, $tomtom_comment);
    }
    ##################################################################
    # Run MAST on DREME motifs
    ##################################################################
    if ($run_mast) {
      my $mast_prog = "mast";
      my $mast_outdir = "dreme_mast_out";
      my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences);
      push(@mast_args, '-bfile', $bfile) if defined($bfile);
      push(@mast_args, '-nostatus') unless $appverbosity >= 2;
      my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"];
      my $mast_comment = 'Predicted locations of all DREME motifs (<i>p</i> &lt; 0.0001) in the input sequences.';
      &report_and_run(\@report_commands, \@report_outputs,
          $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment);
    }
    ##################################################################
    # Run AMA on DREME motifs
    ##################################################################
    if ($run_ama) {
      my $ama_prog = "ama";
      my $ama_outdir = "dreme_ama_out";
      my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir));
      push(@ama_args, '--sdbg', 0) unless defined($bfile);
      push(@ama_args, $dreme_motifs, $sequences);
      push(@ama_args, $bfile) if defined($bfile);
      my $ama_outputs = ["", "ama.txt", "ama.xml"];
      my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.';
      &report_and_run(\@report_commands, \@report_outputs,
          $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment);
    }

    $level--;
  }
  ##################################################################
  # Run AME
  ##################################################################
  if (@dbs) {
    my $ame_prog = "ame";
    my $ame_outdir = "ame_out";
    my @ame_args = ('--verbose', $appverbosity, '--oc', catdir($outdir, $ame_outdir), '--fix-partition', $num_ame_sequences);
    push(@ame_args, '--bgformat', 0) unless defined($bfile);
    push(@ame_args, '--bgformat', 2, '--bgfile', $bfile) if defined($bfile);
    push(@ame_args, $ame_input, @dbs);
    my $ame_outputs = ["ame.html", "ame.txt", ""];
    my $ame_comment = 'Motifs from '.join(", ", @tomtom_dbnames).' enriched in the trimmed (central '.$ccut.'bp) input sequences.';
    &report_and_run(\@report_commands, \@report_outputs,
        $level, $bindir, $ame_prog, \@ame_args, $ame_outputs, $ame_outdir, $ame_comment);
  }
}

################################################################################
#
#       getsize
#
#       return the size of a FASTA file (assumes getsize installed)
#
#       argument:
#         name of file (string)
#
################################################################################

sub getsize {
  my ($filename) = @_;
  # redirect stdout to a temp file
  my $oldout;
  my $tmpout = tempfile('getsize_stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
  open($oldout, ">&STDOUT")  or die("Can't dup STDOUT: $!");
  open(STDOUT, '>&', $tmpout) or die("Can't redirect STDOUT to temp file: $!");
  # redirect stderr to a temp file
  my $olderr;
  my $tmperr = tempfile('getsize_stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1);
  open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
  open(STDERR, '>&', $tmperr) or die("Can't dredirect STDERR to temp file: $!");
  
  # run command
  system($bindir.'/getsize', $filename);
  # copy status
  my $status = $?;
  # reset stderr
  open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
  # reset stdout
  open(STDOUT, ">&", $oldout) or die("Can't reset STDOUT: $!");

  # slurp output and errors
  seek($tmpout, 0, SEEK_SET);
  my $output = do {local $/ = undef; <$tmpout>};
  close($tmpout);
  seek($tmperr, 0, SEEK_SET);
  my $errors = do {local $/ = undef; <$tmperr>};
  close($tmperr);
  
  #log errors
  if ($errors) {
    my $msg = "'getsize $filename': " . $errors;
    $logger->error($msg) if ($logger);
    print(STDERR $msg, "\n");
  }

  # check status
  if ($status == -1) {
    die("Failed to execute command 'getsize $filename': $!");
  } elsif ($status & 127) {
    die(sprintf("Process executing command 'getsize $filename' died with signal %d, %s coredump.",
        ($status & 127), ($status & 128) ? 'with' : 'without'));
  } elsif ($status != 0) {
    die(sprintf("Process executing command 'getsize $filename' exited with value %d indicating failure.", $? >> 8));
  }
  # parse output
  if ($output) {
    my @parts = split(/\s/, $output);
    my $N = $parts[0];
    unless ($N =~ m/^\d+$/ && $N > 0) {
      die("Command 'getsize $filename' didn't return a number");
    }
    return 0 + $N;
  } else {
    die("Command 'getsize $filename' didn't return a number");
  }
}


################################################################################
#
#       report_and_run
#
#       run a job as specified un an entry in %arguments and return the
#        output and command line reports
#    arguments (all string unless otherwise specified):
#         $level:      if defined, the number of indents for 1st table element
#        $bindir:     directory containing executables
#        $args:       command line arguments
#        $program:    program name (executable's name)
#        $outputs:    output file names for HTML report (reference to array)
#        $outputdir:  destination for outputs
#        $comment:    text to display next to outputs
#        $newstdin:   if defined, redirect STDIN there
#        $newstdout:  if defined redirect STDOUT to there
#    returns
#        (command string, output struct)
#
################################################################################
sub report_and_run {
  my ($commands_list, $outputs_list, $level, $bindir, $program, $args, $outputs, $outputdir, $comment, $newstdin, $newstdout) = @_;
  my $errorfile = catfile($outdir, $outputdir . "_stderr.txt");
  my $status = 0;
  my @full_path_outputs = ();
  foreach my $file (@{$outputs}) {
    push(@full_path_outputs, ($file ? catfile($outdir, $outputdir, $file) : ''));
  }
  push(@{$commands_list}, &run_job ($bindir, $program, $args, $newstdin, $newstdout, $errorfile, \$status));
  push(@{$outputs_list}, &report_job (uc($program) . " output:", \@full_path_outputs, $comment, $level, $errorfile, $status));
  write_html();
  return $status;
}

################################################################################
#
#       run_job
#
#       run a step as if on the command-line, with alternatives for
#       STDIN and STDOUT; to be safe revert to previous STDIN and
#       STDOUT before completion; if a path should be appeneded to
#       the command or files, do that before calling run_job
#    arguments:
#         $bindir: directory containing the binary; if empty or undef add no path
#        $commandline: command to execute with arguments
#        $stdin: alternative to standard input
#        $stdout: alternative to standard output
#    both $stdin, $stdout if empty string or undefined default to no
#    change to standard input or output
#    returns:
#        the command line including if defined STDIN and STDOUT redirects
#
################################################################################
sub run_job {
  my($bindir, $program,  $args, $stdin, $stdout, $stderr, $status_ref) = @_;
  my $exe = ($bindir ? catfile($bindir, $program) : $program);
  # make a command line for documentation purposes
  my $commandline = $program;
  foreach my $arg (@{$args}) {
    $commandline .= ' ' . quote_spaces($arg);
  }
  $logger->debug("START: ", $commandline) if $logger;
  # hints from http://perldoc.perl.org/functions/open.html
  # open STDIN and STDOUT with new names, or record as "-" for error
  # messages if not defined
  my ($oldin, $oldout, $olderr);
  if ($stdin) {
    $logger->debug("    from: ", $stdin) if $logger;
    open $oldin, "<&STDIN"     or die("Can't dup STDIN: $!");
    open STDIN, '<', $stdin or die("Can't redirect STDIN: $!");
    $commandline .= ' < ' . quote_spaces($stdin) if ($stdin);
  }
  if ($stdout) {
    $logger->debug("    to: ", $stdout) if $logger;
    open $oldout, ">&STDOUT"     or die("Can't dup STDOUT: $!");
    open STDOUT, '>', $stdout or die("Can't redirect STDOUT: $!");
    $commandline .= ' > ' . quote_spaces($stdout) if ($stdout);
  }
  if ($stderr) {
    $logger->debug("    err: ", $stderr) if $logger;
    open $olderr, ">&STDERR"     or die("Can't dup STDERR: $!");
    open STDERR, '>', $stderr or die("Can't redirect STDERR: $!");
  }
  # hints from http://perldoc.perl.org/functions/system.html and
  # http://stackoverflow.com/questions/1348639/how-can-i-reinitialize-perls-stdin-stdout-stderr
  write_html("Running ". uc($program));
  if ($echo) { # echo command
    if ($stdout) {
      print $oldout $commandline, "\n"; 
    } else {
      print STDOUT $commandline, "\n";
    }
  }
  system($exe, @{$args});
  if (defined $status_ref) {
    $$status_ref = $?;
  }
  my $errmsg = '';
  if ($? == -1) {
    $errmsg = "Failed to execute command '$commandline': $!";
  } elsif ($? & 127) {
    $errmsg = sprintf("Process executing command '$commandline' died with signal %d, %s coredump.",
        ($? & 127), ($? & 128) ? 'with' : 'without');
  } elsif ($? != 0) {
    $errmsg = sprintf("Process executing command '$commandline' exited with value %d indicating failure.", $? >> 8);
  }
  if ($errmsg) {
    $logger->error($errmsg) if ($logger);
    print STDERR $errmsg, "\n";
  }

  open STDIN, "<&", $oldin or die("Can't reset STDIN: $!") if ($stdin);
  open STDOUT, ">&", $oldout or die("Can't reset STDOUT: $!") if ($stdout);
  open STDERR, ">&", $olderr or die("Can't reset STDERR: $!") if ($stderr);

  # get rid of clutter
  unlink $stderr if ($stderr && -e $stderr && -s $stderr == 0);

  $logger->debug("FINISH") if ($logger);
  return {command => $commandline};
}

################################################################################
#
#       quote_spaces
#
#       returns the passed string with quotes around it if it has spaces.
#
################################################################################
sub quote_spaces {
  my ($string) = @_;
  if ($string =~ /\s/) {
    return "\"".$string."\"";
  }
  return $string;
}

################################################################################
#
#       report_job
#
#       returns a structure that is one item of a group in the html template.
#
################################################################################
sub report_job {
      my(
      $command,
      $outputs, # paths to files from cwd
      $comment,
      $level,
      $errorfile,
      $status
      ) = @_;

  my @files = ();
  my ($errors, $warnings);
  
  # calculate the padding
  my $padding = ($level ? $level * 4 : 0);

  # set the error/warning file
  if ($errorfile && -s $errorfile) {
    my $htmlrelpath = abs2rel(abs_path($errorfile), abs_path($outdir));
    if ($status) {
      $errors = $htmlrelpath;
    } else {
      $warnings = $htmlrelpath;
    }
  }

  # loop through all the outputs
  foreach my $file (@$outputs) {
    if ($file && -s $file) {
      # found file, construct relative path to it from index.html
      my $htmlrelpath = abs2rel(abs_path($file), abs_path($outdir));
      # get the display name
      my $disp = fileparse($htmlrelpath);
      if ($disp =~ m/[^\.]+\.html$/i) {
        $disp = "HTML";
      } elsif($disp =~ m/[^\.]+\.xml$/i) {
        $disp = "XML";
      } elsif($disp =~ m/[^\.]+\.txt$/i) {
        $disp = "plain text";
      }
      push(@files, {link => $htmlrelpath, disp => $disp});
    } else {
      # file doesn't exist or is empty
      push(@files, {link => undef, disp => undef});
    }
  }

  return {padding => $padding, item => $command, errors => $errors, warnings => $warnings, files => \@files, desc => $comment};
}