#!/usr/local/bin/perl use strict; use warnings; use lib qw(/home/rmott/meme/lib/perl); use Cwd qw(abs_path); use Fcntl qw(O_CREAT O_WRONLY O_TRUNC SEEK_SET); use File::Basename qw(fileparse); use File::Temp qw(tempfile); use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir); use Getopt::Long; use HTML::Template; use List::Util qw(min max); use Pod::Usage; use Globals; =head1 NAME meme-chip - Does an automated analysis of a ChIPseq DNA sequence dataset with the MEME toolchain. =head1 SYNOPSIS meme-chip [options] Options: -o : output to the specified directory, failing if the directory exists -oc : output to the specified directory, overwriting if the directory exists -index-name : name of html index file; default: index.html -db : target database for use by TOMTOM and AME, if not present then TOMTOM and AME are not run -bfile : background file -nmeme : limit of sequences to pass to MEME -ccut : maximum size of a sequence before it is cut down to a centered section; a value of 0 indicates the sequences should not be cut down; default: 100 -desc : description of the job -fdesc : file containing plain text description of the job -run-mast : run MAST - motif alignment & search tool -run-ama : run AMA - Average motif affinity. -noecho : don't echo the commands run -tar : create a tar.gz file of the outputs -help : display this help message MEME Specific Options: -meme-mod [oops|zoops|anr]: sites used in a single sequence -meme-minw : minimum motif width -meme-maxw : maximum motif width -meme-nmotifs : maximum number of motifs to find -meme-minsites : minimum number of sites per motif -meme-maxsites : maximum number of sites per motif -meme-time : maximum time to run MEME in seconds -meme-p : use parallel version with processors -meme-norevcomp : search given strand only -meme-pal : look for palindromes only =cut # Setup logging my $logger = undef; eval { require Log::Log4perl; Log::Log4perl->import(); Log::Log4perl::init('/home/rmott/meme/etc/logging.conf'); $logger = Log::Log4perl->get_logger('meme.service.memechip'); }; # Log commandline if ($logger && $logger->is_debug()) { $logger->debug("Starting MEME-ChIP"); $logger->debug(join(" ", 'meme-chip', @ARGV)); } # Constants my $bindir = '/home/rmott/meme/bin'; my $site_url = 'http://meme.nbcr.net/meme'; my $template_file = '/home/rmott/meme/etc/meme-chip.tmpl'; my $tmpdir = ''; # use the perl default if none is supplied or the replace fails $tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); # Required Argument my $sequences; # General Options my $help = 0; # FALSE my $echo = 1; # TRUE my $tar = 0; # FALSE my $run_mast = 0; # FALSE my $run_ama = 0; # FALSE my $appverbosity = 1; #all the applications write to stderr currently so had to disable this my $clobber = 1; # TRUE my $outdir = 'memechip_out'; my $index_name = 'index.html'; my $desc = undef; my $nmeme = $MAXMEMESEQS; my $ccut = $CMAX; my @dbs = (); my $bfile = undef; # MEME Specific Options my $meme_minw = undef; # default is 6 my $meme_maxw = undef; # default is 30 my $meme_mod = 'zoops'; my $meme_nmotifs = 3; my $meme_minsites = undef; my $meme_maxsites = undef; my $meme_time = undef; my $meme_p = undef; my $meme_norevcomp = 0; # FALSE my $meme_palindromes = 0; # FALSE # Derived Globals my $outfile; my $stderr_txt; my $stdout_txt; my @tomtom_dbnames = (); my @desc_para = (); # Report Globals my @report_inputs = (); my @report_outputs = (); my @report_commands = (); my $report_groups = [{group => "Results", items => \@report_outputs}, {group => "Data", items => \@report_inputs}]; # Parse Options $logger->debug("Parsing options") if $logger; my $options_success = 0; # FALSE my $options_err = undef; eval { # redirect stderr to a temp file as we want to log it my $olderr; my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); open($olderr, ">&STDERR") or die("Can't dup STDERR: $!"); open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!"); # parse options $options_success = GetOptions( 'help|?' => \$help, 'noecho' => sub {$echo = 0}, 'tar' => \$tar, 'run-mast' => \$run_mast, 'run-ama' => \$run_ama, 'o=s' => sub {$clobber = 0; shift; $outdir = shift}, 'oc=s' => \$outdir, 'index-name=s' => \$index_name, 'desc=s' => \$desc, 'fdesc=s' => # slurp the description into a scalar sub { my ($param, $file) = @_; open(my $fh, '<', $file) or die("Couldn't open description file: $!"); $desc = do { local( $/ ); <$fh>}; close($fh); }, 'nmeme=i' => \$nmeme, 'ccut=i' => \$ccut, 'db=s' => \@dbs, 'bfile=s' => \$bfile, 'meme-mod=s' => \$meme_mod, 'meme-minw=i' => \$meme_minw, 'meme-maxw=i' => \$meme_maxw, 'meme-nmotifs=i' => \$meme_nmotifs, 'meme-minsites=i' => \$meme_minsites, 'meme-maxsites=i' => \$meme_maxsites, 'meme-time=i' => \$meme_time, 'meme-p=i' => \$meme_p, 'meme-norevcomp' => \$meme_norevcomp, 'meme-pal' => \$meme_palindromes, ); # reset STDERR open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!"); # slurp errors seek($tmperr, SEEK_SET, 0); chomp($options_err = do {local ($/); <$tmperr>}); close($tmperr); }; # check for eval problems if ($@) { $logger->fatal($@) if $logger; die($@); } # check for problems parsing options if (!$options_success) { $logger->fatal($options_err) if $logger; pod2usage($options_err); } pod2usage(1) if $help; # Validate Options and Arguments $logger->debug("Validating options") if $logger; eval { die("A sequences file must be specified.") unless @ARGV; $sequences = shift @ARGV; die("The sequences file (\"".$sequences."\") does not exist.") unless (-e $sequences); # validate background die("The background file (\"". $bfile ."\") specified by option bg does not exist.") if ($bfile && not -e $bfile); # validate nmeme die("Option nmeme must be at least 1. Got $nmeme.") if ($nmeme < 1); # validate ccut die("Option ccut must be larger or equal to $MINMEMESEQW. Got $ccut.") if ($ccut < $MINMEMESEQW && $ccut != 0); # validate meme-mod if ($meme_mod =~ m/^(oops|zoops|anr)$/) { $meme_mod = $1; } else { die("Option meme-mod must be either oops, zoops or anr. Got $meme_mod."); } # validate meme-minw and meme-maxw, setting defaults if required die("Option meme-minw must be between $MINW and $MAXW. Got $meme_minw.") if (defined($meme_minw) && ($meme_minw < $MINW || $meme_minw > $MAXW)); if (defined($meme_maxw) && ($meme_maxw < $MINW || $meme_maxw > $MAXW || (defined($meme_minw) && $meme_maxw < $meme_minw))) { die("Option meme-maxw must be between $MINW and $MAXW and larger or equal to option meme-minw if it is supplied. Got $meme_maxw."); } $meme_minw = (defined($meme_maxw) ? min(6, $meme_maxw) : 6) unless defined($meme_minw); $meme_maxw = max($meme_minw, 30) unless defined($meme_maxw); # validate meme-nmotifs die("Option meme-nmotifs must be at least 1. Got $meme_nmotifs.") if ($meme_nmotifs < 1); # validate meme-minsites and meme-maxsites if (defined($meme_minsites) && ($meme_minsites < $MINSITES || $meme_minsites > $MAXSITES)) { die("Option meme-minsites must be between $MINSITES and $MAXSITES. Got $meme_minsites."); } if (defined($meme_maxsites) && ($meme_maxsites < $MINSITES || $meme_maxsites > $MAXSITES || (defined($meme_minsites) && $meme_maxsites < $meme_minsites))) { die("Option meme-maxsites must be between $MINSITES and $MAXSITES and ". "larger or equal to option meme-minsites if it is supplied. Got $meme_maxsites."); } # validate meme-time die("Option meme-time must be at least 1. Got $meme_time.") if (defined($meme_time) && $meme_time < 1); # validate meme-p die("Option meme-p must be at least 1. Got $meme_p.") if (defined($meme_p) && $meme_p < 1); # validate Tomtom (and AME) databases foreach my $db (@dbs) { die("Missing Tomtom database \"".$db."\"") unless (-e $db); } }; if ($@) { $logger->fatal($@) if $logger; pod2usage($@); } # Do this outside of main because these bits are needed to make # the html output which main relys on for error reporting eval { # convert the tomtom dbs into a nicer readable form $logger->debug("Parsing db names") if $logger; foreach my $db (@dbs) { my $dbname = fileparse($db, '.meme'); $dbname =~ s/_/ /g; #replace underscores push(@tomtom_dbnames, $dbname); } # parse the description into paragraphs and lines # a paragraph is seperated from other paragraphs by # multiple new line characters $logger->debug("Parsing description") if $logger; if ($desc) { my @paragraphs = split(/\n\n/, $desc); for my $paragraph (@paragraphs) { my @para_lines = split(/\n/, $paragraph); my $text = shift(@para_lines); my @lines = (); foreach my $line (@para_lines) { push(@lines, {text => $line}); } push(@desc_para, {text => $text, lines => \@lines}); } } # Create the output directory $logger->debug("Creating output directory") if $logger; if (-e $outdir) { die("Output directory \"$outdir\" already exists\n") if (!$clobber); } else { mkdir($outdir) or die("Failed to create output directory: $!"); push(@report_commands, {command => 'mkdir ' . "e_spaces($outdir)}); } # calculate some file names $outfile = catfile($outdir, $index_name); $stdout_txt = catfile($outdir,'stdout.txt'); $stderr_txt = catfile($outdir, 'stderr.txt'); }; if ($@) { $logger->fatal($@) if $logger; die($@); } # Run main which does most of the work and if it completes mark the html as done. $logger->debug("Running main") if $logger; eval { &main(); &write_html("", 0); # html to go in tar if ($tar) { my $tarname = &tar_output(); &write_html("", 0, undef, $tarname); # index which links to tar } }; # If errors occured then write out the html meantioning the errors, log and die if ($@) { my $msg = $@; eval { &write_html("Error!", 0, $msg); }; $logger->fatal($msg) if ($logger); die($msg); } ################################################################################ # # write_html # # Writes out the html to a file. # ################################################################################ sub write_html { my $fh; my ($status_text, $refresh, $error_msg, $tar) = @_; # give defaults for unsupplied parameters $refresh = 60 unless defined($refresh); $status_text = "Running" unless defined($status_text); # open the file for writing sysopen($fh, $outfile, O_CREAT | O_WRONLY | O_TRUNC) or die("Failed to open \"$outfile\"."); # create the template my $template = HTML::Template->new(filename => $template_file); my $data = {status => $status_text, desc => \@desc_para, error => $error_msg, refresh => $refresh, groups => $report_groups, commands => \@report_commands, command_rows => scalar(@report_commands) * 2, site_url => $site_url, tar => $tar}; $template->param(%{$data}); # write the template to file print $fh $template->output(); # close the file close($fh) or die("Failed to close \"$outfile\"."); } ################################################################################ # # tar_output # # tars the output directory # ################################################################################ sub tar_output { my $seqs = fileparse($sequences); my @outfiles = ($seqs, 'seqs-centered', 'seqs-shuffled', 'seqs-sampled', 'seqs-discarded', 'seqs-centered_w_bg', 'index.html'); my @outdirs = ('meme_out', 'meme_tomtom_out', 'meme_mast_out', 'meme_ama_out', 'dreme_out', 'dreme_tomtom_out', 'dreme_mast_out', 'dreme_ama_out', 'ame_out'); foreach my $dir (@outdirs) { push(@outfiles, $dir, $dir . '_stderr.txt'); } # check all the files exist (otherwise tar will fail) my $i; for ($i = 0; $i < scalar(@outfiles); $i++) { unless (-e catfile($outdir, $outfiles[$i])) { # delete the missing file splice @outfiles, $i, 1; $i--; } } # we want to tar including the outdir to avoid a tarbomb # find the real name of the outdir (user could have passed '.') my $folder = (splitdir(abs_path($outdir)))[-1]; my $folder_dir = abs_path(catdir($outdir, '..')); # append the folder to the outfiles for (my $i = 0; $i < scalar(@outfiles); $i++) { $outfiles[$i] = catfile($folder, $outfiles[$i]); } my $tarname = $folder . ".tar.gz"; my $tarfile = catfile($outdir, $tarname); # run tar system('tar', '-czf', $tarfile, '-C', $folder_dir, @outfiles); my $errmsg = ''; if ($? == -1) { $errmsg = "Failed to execute tar: $!"; } elsif ($? & 127) { $errmsg = sprintf("tar died with signal %d, %s coredump.", ($? & 127), ($? & 128) ? 'with' : 'without'); } elsif ($? != 0) { $errmsg = sprintf("tar exited with value %d indicating failure.", $? >> 8); } die($errmsg) if $errmsg; return $tarname; } ################################################################################ # # main # # Run all the programs # ################################################################################ sub main { my $seqs_centered = catfile($outdir, "seqs-centered"); my $seqs_shuffled = catfile($outdir, "seqs-shuffled"); my $seqs_sampled = catfile($outdir, "seqs-sampled"); my $seqs_discarded = catfile($outdir, "seqs-discarded"); my $seqs_final; my $num_ame_sequences; my $ame_input; #################################################################### # set up and record the inputs #################################################################### $logger->debug("Setting up the sequence inputs") if $logger; push(@report_inputs, &report_job ("Input Sequences:", [$sequences,"",""], "Your original untrimmed sequences.")); &write_html(); if ($ccut > 0) { push(@report_commands, &run_job ($bindir, "fasta-center", ["-len", $ccut], $sequences, $seqs_centered)); push(@report_inputs, &report_job ("fasta-center output:", [$seqs_centered,"",""], "Your sequences centered and trimed to width $ccut.")); &write_html(); } else { # centering disabled $seqs_centered = $sequences; } my $num_centered_sequences = &getsize($seqs_centered); my $dinucargs = ["-f", $seqs_centered, "-t", "-dinuc"]; push(@report_commands, &run_job ($bindir, "fasta-dinucleotide-shuffle", $dinucargs, undef, $seqs_shuffled)); push(@report_inputs, &report_job ("fasta-dinucleotide-shuffle output:", [$seqs_shuffled,"",""], "The centered sequences randomly shuffled maintaining their dinucleotide frequency, used as a background for DREME and AME.")); &write_html(); #################################################################### # set up inputs for AME #################################################################### $logger->debug("Setting up the AME inputs") if $logger; $num_ame_sequences = $num_centered_sequences; $ame_input = $seqs_centered."_w_bg"; push(@report_commands, &run_job (undef, "cat", [$seqs_centered, $seqs_shuffled], undef, $ame_input)); push(@report_inputs, &report_job ("AME input:", [$ame_input,"",""], "The centered sequences followed by the same ". "sequences after dinucleotide shuffling.")); &write_html(); ################################################################## # set up inputs for MEME ################################################################## $logger->debug("Setting up the MEME inputs") if $logger; # subsample to maximum sequences we want for MEME if too many sequences $seqs_final = $seqs_centered; my $meme_sample_source = ""; if ($num_centered_sequences > $nmeme) { $meme_sample_source = " $nmeme (randomly chosen)"; push(@report_commands, &run_job($bindir, "fasta-subsample", [$seqs_centered, $nmeme, "-rest", $seqs_discarded], undef, $seqs_sampled)); $seqs_final = $seqs_sampled; push(@report_inputs, &report_job ("fasta-subsample (used) output:", [$seqs_sampled,"",""], "A random sample of $nmeme of the centered sequences, used by MEME.")); push(@report_inputs, &report_job ( "fasta-subsample (discarded) output:", [$seqs_discarded,"",""], "The centered sequences omitted from the sample used by MEME.")); &write_html(); } my $level = 0; ################################################################## # Run MEME ################################################################## my $meme_prog = "meme"; my $meme_outdir = "meme_out"; # TODO original file name is not recorded #'-sf', D:\\ERK2\\ERK221_BKG09\\ERK221_BKG09_MACS_top1000.fasta my @meme_args = ($seqs_final, '-oc', catdir($outdir, $meme_outdir), '-dna', '-mod', $meme_mod, '-nmotifs', $meme_nmotifs, '-minw', $meme_minw, '-maxw', $meme_maxw); push(@meme_args, '-bfile', $bfile) if defined($bfile); push(@meme_args, '-minsites', $meme_minsites) if defined($meme_minsites); push(@meme_args, '-maxsites', $meme_maxsites) if defined($meme_maxsites); push(@meme_args, '-time', $meme_time) if defined($meme_time); push(@meme_args, '-p', $meme_p) if defined($meme_p); push(@meme_args, '-revcomp') unless $meme_norevcomp; push(@meme_args,'-nostatus') unless $appverbosity >= 2; my $meme_outputs = ["meme.html", "meme.txt", "meme.xml"]; my $meme_comment = 'Motifs discovered in'.$meme_sample_source.' trimmed (central '.$ccut.'bp) input sequences.'; my $meme_status = &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $meme_prog, \@meme_args, $meme_outputs, $meme_outdir, $meme_comment); my $meme_motifs = catfile($outdir, $meme_outdir, 'meme.txt'); ################################################################## # Run programs dependant on MEME ################################################################## if ($meme_status == 0) { $level++; ################################################################## # Run TOMTOM on MEME motifs ################################################################## if (@dbs) { my $tomtom_prog = "tomtom"; my $tomtom_outdir = "meme_tomtom_out"; my @tomtom_args = ('-verbosity', $appverbosity, '-oc', catdir($outdir, $tomtom_outdir), '-min-overlap', 5, '-dist', 'pearson', '-evalue', '-thresh', 0.1, '-no-ssc'); push(@tomtom_args, '-bfile', $bfile) if defined($bfile); push(@tomtom_args, $meme_motifs, @dbs); my $tomtom_outputs = ["tomtom.html", "tomtom.txt", "tomtom.xml"]; my $tomtom_comment = 'Motifs from '. join(", ", @tomtom_dbnames) .' that match motifs MEME discovers.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $tomtom_prog, \@tomtom_args, $tomtom_outputs, $tomtom_outdir, $tomtom_comment); } ################################################################## # Run MAST on MEME motifs ################################################################## if ($run_mast) { my $mast_prog = "mast"; my $mast_outdir = "meme_mast_out"; my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $meme_motifs, $sequences, '-ev', $num_centered_sequences); push(@mast_args, '-bfile', $bfile) if defined($bfile); push(@mast_args,'-nostatus') unless $appverbosity >= 2; my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; my $mast_comment = 'Predicted locations of all MEME motifs (p < 0.0001) in the input sequences.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); } ################################################################## # Run AMA on MEME motifs ################################################################## if ($run_ama) { my $ama_prog = "ama"; my $ama_outdir = "meme_ama_out"; my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); push(@ama_args, '--sdbg', 0) unless defined($bfile); push(@ama_args, $meme_motifs, $sequences); push(@ama_args, $bfile) if defined($bfile); my $ama_outputs = ["", "ama.txt", "ama.xml"]; my $ama_comment = 'Estimated binding affinity of each MEME motif to each input sequence.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); } $level--; } ################################################################## # Run DREME ################################################################## my $dreme_prog = "dreme"; my $dreme_outdir = "dreme_out"; my @dreme_args = ('-v', $appverbosity, '-oc', $dreme_outdir, '-p', $seqs_centered, '-n', $seqs_shuffled, '-png'); my $dreme_outputs = ["dreme.html", "dreme.txt", "dreme.xml"]; my $dreme_comment = 'Motifs discovered in the trimmed (central '.$ccut.'bp) input sequences.'; my $dreme_motifs = catfile($outdir, $dreme_outdir, 'dreme.txt'); my $dreme_status = &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $dreme_prog, \@dreme_args, $dreme_outputs, $dreme_outdir, $dreme_comment); ################################################################## # Run programs dependant on DREME ################################################################## if ($dreme_status == 0) { $level++; ################################################################## # Run TOMTOM on DREME motifs ################################################################## if (@dbs) { my $tomtom_prog = "tomtom"; my $tomtom_outdir = "dreme_tomtom_out"; my @tomtom_args = ('-verbosity', $appverbosity, '-oc', catdir($outdir, $tomtom_outdir), '-min-overlap', 5, '-dist', 'pearson', '-evalue', '-thresh', 0.1, '-no-ssc'); push(@tomtom_args, '-bfile', $bfile) if defined($bfile); push(@tomtom_args, $dreme_motifs, @dbs); my $tomtom_outputs = ["tomtom.html", "tomtom.txt", "tomtom.xml"]; my $tomtom_comment = 'Motifs from '. join(", ", @tomtom_dbnames) .' that match motifs DREME discovers.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $tomtom_prog, \@tomtom_args, $tomtom_outputs, $tomtom_outdir, $tomtom_comment); } ################################################################## # Run MAST on DREME motifs ################################################################## if ($run_mast) { my $mast_prog = "mast"; my $mast_outdir = "dreme_mast_out"; my @mast_args = ('-oc', catdir($outdir, $mast_outdir), $dreme_motifs, $sequences, '-ev', $num_centered_sequences); push(@mast_args, '-bfile', $bfile) if defined($bfile); push(@mast_args, '-nostatus') unless $appverbosity >= 2; my $mast_outputs = ["mast.html", "mast.txt", "mast.xml"]; my $mast_comment = 'Predicted locations of all DREME motifs (p < 0.0001) in the input sequences.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $mast_prog, \@mast_args, $mast_outputs, $mast_outdir, $mast_comment); } ################################################################## # Run AMA on DREME motifs ################################################################## if ($run_ama) { my $ama_prog = "ama"; my $ama_outdir = "dreme_ama_out"; my @ama_args = ('--verbosity', $appverbosity, '--oc', catdir($outdir, $ama_outdir)); push(@ama_args, '--sdbg', 0) unless defined($bfile); push(@ama_args, $dreme_motifs, $sequences); push(@ama_args, $bfile) if defined($bfile); my $ama_outputs = ["", "ama.txt", "ama.xml"]; my $ama_comment = 'Estimated binding affinity of each DREME motif to each input sequence.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $ama_prog, \@ama_args, $ama_outputs, $ama_outdir, $ama_comment); } $level--; } ################################################################## # Run AME ################################################################## if (@dbs) { my $ame_prog = "ame"; my $ame_outdir = "ame_out"; my @ame_args = ('--verbose', $appverbosity, '--oc', catdir($outdir, $ame_outdir), '--fix-partition', $num_ame_sequences); push(@ame_args, '--bgformat', 0) unless defined($bfile); push(@ame_args, '--bgformat', 2, '--bgfile', $bfile) if defined($bfile); push(@ame_args, $ame_input, @dbs); my $ame_outputs = ["ame.html", "ame.txt", ""]; my $ame_comment = 'Motifs from '.join(", ", @tomtom_dbnames).' enriched in the trimmed (central '.$ccut.'bp) input sequences.'; &report_and_run(\@report_commands, \@report_outputs, $level, $bindir, $ame_prog, \@ame_args, $ame_outputs, $ame_outdir, $ame_comment); } } ################################################################################ # # getsize # # return the size of a FASTA file (assumes getsize installed) # # argument: # name of file (string) # ################################################################################ sub getsize { my ($filename) = @_; # redirect stdout to a temp file my $oldout; my $tmpout = tempfile('getsize_stdout_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); open($oldout, ">&STDOUT") or die("Can't dup STDOUT: $!"); open(STDOUT, '>&', $tmpout) or die("Can't redirect STDOUT to temp file: $!"); # redirect stderr to a temp file my $olderr; my $tmperr = tempfile('getsize_stderr_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); open($olderr, ">&STDERR") or die("Can't dup STDERR: $!"); open(STDERR, '>&', $tmperr) or die("Can't dredirect STDERR to temp file: $!"); # run command system($bindir.'/getsize', $filename); # copy status my $status = $?; # reset stderr open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!"); # reset stdout open(STDOUT, ">&", $oldout) or die("Can't reset STDOUT: $!"); # slurp output and errors seek($tmpout, 0, SEEK_SET); my $output = do {local $/ = undef; <$tmpout>}; close($tmpout); seek($tmperr, 0, SEEK_SET); my $errors = do {local $/ = undef; <$tmperr>}; close($tmperr); #log errors if ($errors) { my $msg = "'getsize $filename': " . $errors; $logger->error($msg) if ($logger); print(STDERR $msg, "\n"); } # check status if ($status == -1) { die("Failed to execute command 'getsize $filename': $!"); } elsif ($status & 127) { die(sprintf("Process executing command 'getsize $filename' died with signal %d, %s coredump.", ($status & 127), ($status & 128) ? 'with' : 'without')); } elsif ($status != 0) { die(sprintf("Process executing command 'getsize $filename' exited with value %d indicating failure.", $? >> 8)); } # parse output if ($output) { my @parts = split(/\s/, $output); my $N = $parts[0]; unless ($N =~ m/^\d+$/ && $N > 0) { die("Command 'getsize $filename' didn't return a number"); } return 0 + $N; } else { die("Command 'getsize $filename' didn't return a number"); } } ################################################################################ # # report_and_run # # run a job as specified un an entry in %arguments and return the # output and command line reports # arguments (all string unless otherwise specified): # $level: if defined, the number of indents for 1st table element # $bindir: directory containing executables # $args: command line arguments # $program: program name (executable's name) # $outputs: output file names for HTML report (reference to array) # $outputdir: destination for outputs # $comment: text to display next to outputs # $newstdin: if defined, redirect STDIN there # $newstdout: if defined redirect STDOUT to there # returns # (command string, output struct) # ################################################################################ sub report_and_run { my ($commands_list, $outputs_list, $level, $bindir, $program, $args, $outputs, $outputdir, $comment, $newstdin, $newstdout) = @_; my $errorfile = catfile($outdir, $outputdir . "_stderr.txt"); my $status = 0; my @full_path_outputs = (); foreach my $file (@{$outputs}) { push(@full_path_outputs, ($file ? catfile($outdir, $outputdir, $file) : '')); } push(@{$commands_list}, &run_job ($bindir, $program, $args, $newstdin, $newstdout, $errorfile, \$status)); push(@{$outputs_list}, &report_job (uc($program) . " output:", \@full_path_outputs, $comment, $level, $errorfile, $status)); write_html(); return $status; } ################################################################################ # # run_job # # run a step as if on the command-line, with alternatives for # STDIN and STDOUT; to be safe revert to previous STDIN and # STDOUT before completion; if a path should be appeneded to # the command or files, do that before calling run_job # arguments: # $bindir: directory containing the binary; if empty or undef add no path # $commandline: command to execute with arguments # $stdin: alternative to standard input # $stdout: alternative to standard output # both $stdin, $stdout if empty string or undefined default to no # change to standard input or output # returns: # the command line including if defined STDIN and STDOUT redirects # ################################################################################ sub run_job { my($bindir, $program, $args, $stdin, $stdout, $stderr, $status_ref) = @_; my $exe = ($bindir ? catfile($bindir, $program) : $program); # make a command line for documentation purposes my $commandline = $program; foreach my $arg (@{$args}) { $commandline .= ' ' . quote_spaces($arg); } $logger->debug("START: ", $commandline) if $logger; # hints from http://perldoc.perl.org/functions/open.html # open STDIN and STDOUT with new names, or record as "-" for error # messages if not defined my ($oldin, $oldout, $olderr); if ($stdin) { $logger->debug(" from: ", $stdin) if $logger; open $oldin, "<&STDIN" or die("Can't dup STDIN: $!"); open STDIN, '<', $stdin or die("Can't redirect STDIN: $!"); $commandline .= ' < ' . quote_spaces($stdin) if ($stdin); } if ($stdout) { $logger->debug(" to: ", $stdout) if $logger; open $oldout, ">&STDOUT" or die("Can't dup STDOUT: $!"); open STDOUT, '>', $stdout or die("Can't redirect STDOUT: $!"); $commandline .= ' > ' . quote_spaces($stdout) if ($stdout); } if ($stderr) { $logger->debug(" err: ", $stderr) if $logger; open $olderr, ">&STDERR" or die("Can't dup STDERR: $!"); open STDERR, '>', $stderr or die("Can't redirect STDERR: $!"); } # hints from http://perldoc.perl.org/functions/system.html and # http://stackoverflow.com/questions/1348639/how-can-i-reinitialize-perls-stdin-stdout-stderr write_html("Running ". uc($program)); if ($echo) { # echo command if ($stdout) { print $oldout $commandline, "\n"; } else { print STDOUT $commandline, "\n"; } } system($exe, @{$args}); if (defined $status_ref) { $$status_ref = $?; } my $errmsg = ''; if ($? == -1) { $errmsg = "Failed to execute command '$commandline': $!"; } elsif ($? & 127) { $errmsg = sprintf("Process executing command '$commandline' died with signal %d, %s coredump.", ($? & 127), ($? & 128) ? 'with' : 'without'); } elsif ($? != 0) { $errmsg = sprintf("Process executing command '$commandline' exited with value %d indicating failure.", $? >> 8); } if ($errmsg) { $logger->error($errmsg) if ($logger); print STDERR $errmsg, "\n"; } open STDIN, "<&", $oldin or die("Can't reset STDIN: $!") if ($stdin); open STDOUT, ">&", $oldout or die("Can't reset STDOUT: $!") if ($stdout); open STDERR, ">&", $olderr or die("Can't reset STDERR: $!") if ($stderr); # get rid of clutter unlink $stderr if ($stderr && -e $stderr && -s $stderr == 0); $logger->debug("FINISH") if ($logger); return {command => $commandline}; } ################################################################################ # # quote_spaces # # returns the passed string with quotes around it if it has spaces. # ################################################################################ sub quote_spaces { my ($string) = @_; if ($string =~ /\s/) { return "\"".$string."\""; } return $string; } ################################################################################ # # report_job # # returns a structure that is one item of a group in the html template. # ################################################################################ sub report_job { my( $command, $outputs, # paths to files from cwd $comment, $level, $errorfile, $status ) = @_; my @files = (); my ($errors, $warnings); # calculate the padding my $padding = ($level ? $level * 4 : 0); # set the error/warning file if ($errorfile && -s $errorfile) { my $htmlrelpath = abs2rel(abs_path($errorfile), abs_path($outdir)); if ($status) { $errors = $htmlrelpath; } else { $warnings = $htmlrelpath; } } # loop through all the outputs foreach my $file (@$outputs) { if ($file && -s $file) { # found file, construct relative path to it from index.html my $htmlrelpath = abs2rel(abs_path($file), abs_path($outdir)); # get the display name my $disp = fileparse($htmlrelpath); if ($disp =~ m/[^\.]+\.html$/i) { $disp = "HTML"; } elsif($disp =~ m/[^\.]+\.xml$/i) { $disp = "XML"; } elsif($disp =~ m/[^\.]+\.txt$/i) { $disp = "plain text"; } push(@files, {link => $htmlrelpath, disp => $disp}); } else { # file doesn't exist or is empty push(@files, {link => undef, disp => undef}); } } return {padding => $padding, item => $command, errors => $errors, warnings => $warnings, files => \@files, desc => $comment}; }