#!/usr/local/bin/perl -w # FILE: metatest # AUTHOR: William Stafford Noble # CREATE DATE: 7-13-97 # PROJECT: MHMM # COPYRIGHT: 2008, University of Washington # DESCRIPTION: Run a complete set of tests on the Meta-MEME software. use strict; use warnings; use Cwd qw(abs_path); use Fcntl qw(SEEK_SET O_RDONLY); use File::Basename qw(fileparse); use File::Copy qw(copy); use File::Path qw(rmtree); use File::Spec::Functions qw(catfile catdir tmpdir); use Getopt::Long; use Pod::Usage; =head1 NAME metatest - Tests some MEME suite programs. =head1 SYNOPSIS metatest [options] Options: -update update expected program output -complete test additional programs -halt stop after any failed tests =head1 OPTIONS =over 8 =item B<-update> Turns on update mode, replaces expected output files with observed output files when a difference occurs. Use in conjunction with -halt to avoid automatically updating all files. =item B<-complete> Test additional scaffolding programs: clustalw2fasta, fasta-io, log-hmm, meme-io, mhmm-io =back =head1 DESCRIPTION This script tests the programs ama, dreme, fasta-dinucleotide-shuffle, fasta-center, fimo, gomo, mhmms, motiph, psp-gen, qvalue, rha, shadow, tomtom. =cut # Set defaults for global variables. my $complete = 0; my $continue = 1; my $update = 0; my $tmpdir = ''; $tmpdir = &tmpdir() if ($tmpdir eq '' || $tmpdir =~ m/^\@TMP[_]DIR\@$/); GetOptions( "update" => \$update, "complete" => \$complete, "halt" => sub {$continue = 0} ) or pod2usage(2); # find the location of the script my ($script_name, $script_dir) = fileparse(__FILE__); $script_dir = abs_path($script_dir); # add script location to search path unshift(@INC, $script_dir); require ExecUtils; # invoke, stringify_args require DiffXML; # diff_xml # now assuming the script is in the scripts subdirectory # the distribution directory should be just above it my $dist_dir = abs_path(catdir($script_dir, '..')); # change the current directory to the tests directory chdir(catdir($dist_dir, 'tests')); my $etc_dir = catdir($dist_dir, 'etc'); my $src_dir = catdir($dist_dir, 'src'); my $test_results = 0; if ($complete) { &test_scaffold("crp0", 1); &test_scaffold("lipo", 0); } #&test_core("crp0", 1); #&test_core("lipo", 0); # Set the location of the stylesheets $ENV{'MEME_ETC_DIR'} = $etc_dir; $ENV{'MEME_BIN_DIR'} = $src_dir; # Test SpaMo $test_results += &test_spamo(); # Test DREME $test_results += &test_dreme(); # Test CentriMo $test_results += &test_centrimo(); # Test mcast (basic) $test_results += &test_program($update, $continue, "mcast", "--text -verbosity 1 --quiet" . " meme/meme.lex0.zoops" . " common/lex0.s", "mcast/lex0.zoops.mcast"); # Test mhmms with --maxseqs option (r1635) $test_results += &test_program($update, $continue, "mhmms", "--maxseqs 3 --quiet" . " mhmm/crp0.linear.mhmm" . " common/crp0.fasta", "mhmms/r1635.maxseqs.mhmms"); # Test mhmms with --global option (r1635) $test_results += &test_program($update, $continue, "mhmms", "--global --quiet" . " mhmm/lipo.linear.mhmm" . " common/lipo.fasta", "mhmms/r1635.global.mhmms"); # Test mhmms for bug where runs of # spacer states are scored as matches (r1635). # With theses input files mhmms should find no matches. $test_results += &test_program($update, $continue, "mhmms", "--quiet --motif-scoring" . " mhmm/lipo.linear.mhmm" . " mhmms/r1635.fasta", "mhmms/r1635.mhmms"); # Test mhmms for segmentation fault when --motif-scoring option # is used. (r1584) $test_results += &test_program($update, $continue, "mhmms", "--motif-scoring --quiet" . " mhmms/r1584.hmm" . " mhmms/r1584.fasta", "mhmms/r1584.mhmms"); # Test psp-gen on DNA $test_results += &test_program($update, $continue, "psp-gen", "-revcomp " . " -pos psp-gen/one-peak-dna.fasta" . " -neg psp-gen/all-A.fasta", "psp-gen/one-peak-dna-revcomp.psp", $script_dir); # Test psp-gen on protein $test_results += &test_program($update, $continue, "psp-gen", "-alpha prot -maxrange -triples " . " -pos psp-gen/one-peak-protein.fasta" . " -neg psp-gen/all-A.fasta", "psp-gen/one-peak-protein.psp", $script_dir); # Test fimo # The MEME_ETC_DIR contains the needed style sheets. $test_results += &test_program($update, $continue, "fimo", "--text --motif-pseudo 0.01" . " fimo/MCM1.meme.html" . " fimo/spiked.fasta", "fimo/fimo.txt"); # Test fimo with --motif option # The MEME_ETC_DIR contains the needed style sheets. $test_results += &test_program($update, $continue, "fimo", "--text --motif 2 --motif 3" . " --output-pthresh 0.01" . " common/crp0.meme.html" . " motiph/spiked.fasta", "fimo/fimo-motif23.txt"); # Test fimo with --psp and --prior-dist options # The MEME_ETC_DIR contains the needed style sheets. $test_results += &test_program($update, $continue, "fimo", "--text " . " --psp fimo/GCN4_YPD.psp " . " --prior-dist fimo/prior.dist.txt " . " fimo/GCN4.meme " . " fimo/GCN4_YPD.fasta", "fimo/fimo-priors.txt"); # Test fimo with --psp and --prior-dist options # with genomic coordinates provied in PSP and fasta files. # The MEME_ETC_DIR contains the needed style sheets. $test_results += &test_program($update, $continue, "fimo", "--text " . " --psp fimo/GCN4_YPD-genomic.psp " . " --prior-dist fimo/prior.dist.txt " . " fimo/GCN4.meme " . " fimo/GCN4_YPD-genomic.fasta", "fimo/fimo-priors-genomic.txt"); # Test ama with maxodds scoring $test_results += &test_program($update, $continue, "ama", " --verbosity 1 --scoring max-odds" . " gomo/motif.meme" . " gomo/seqs.fasta" . " gomo/seqs.bg", "gomo/ama.withMaxodds.xml"); # Test ama with redundant sequences $test_results += &test_program($update, $continue, "ama", " --pvalues --verbosity 1 --cs" . " gomo/motif.meme" . " gomo/seqs_red.fasta" . " gomo/seqs.norc.bg", "gomo/ama.redundant.xml"); # Test gomo on single species $test_results += &test_program($update, $continue, "gomo", " --nostatus --verbosity 1 --text" . " gomo/GO2Gene.map.csv" . " gomo/ama.nozscoring.xml", "gomo/gomo.smallthreshold.txt"); # Test gomo on multiple species $test_results += &test_program($update, $continue, "gomo", " --nostatus --verbosity 1 --text" . " gomo/GO2Gene.map.csv" . " gomo/ama.nozscoring.xml" . " gomo/ama.nozscoring.xml", "gomo/gomo.multipeSpecies.txt"); # Test motiph $test_results += &test_program($update, $continue, "motiph", "--seed 0 --bg 2.0 --pseudocount 0.01 --text " . " motiph/spiked.aln" . " motiph/yeast.tree" . " motiph/MCM1.meme.html", "motiph/motiph.gff"); # Test motiph with --motif option $test_results += &test_program($update, $continue, "motiph", "--seed 0 --bg 2.0 --pseudocount 0.01 --text " . " --output-pthresh 1.0" . " --motif 2 --motif 3" . " motiph/spiked.aln" . " motiph/yeast.tree" . " common/crp0.meme.html", "motiph/motiph-motif23.gff"); # Test shadow $test_results += &test_program($update, $continue, "shadow", "--output-pthresh 0.1 --bg 2.0 --text" . " motiph/spiked.aln" . " motiph/yeast.tree", "motiph/shadow.gff"); # tomtom test is failing on some platforms. Issue with platfrom variation # in random number generators? FIXME # Test tomtom (7 distance measures) foreach my $score ( "allr", "ed", "kullback", "pearson", "sandelin", "blic1", "blic5" ) { $test_results += &test_program($update, $continue, "tomtom", "-verbosity 1 -seed 1 -dist " . $score . " -text -incomplete-scores" . " common/sample.meme common/sample.meme", "tomtom/tomtom.out." . $score); } # Test Tomtom using complete scoring # (7 distance measures) foreach my $score ( "allr", "ed", "kullback", "pearson", "sandelin", "blic1", "blic5" ) { $test_results += &test_program($update, $continue, "tomtom", "-verbosity 1 -seed 1 -dist " . $score . " -text " . " common/sample.meme common/sample.meme", "tomtom/tomtom.complete.out." . $score); } # qvalue test is failing on some platforms. Issue with platfrom variation # in random number generators? FIXME $test_results += &test_program($update, $continue, "qvalue", "--header 1 --append --column 2 --seed 7718 qvalue/uniform.txt", "qvalue/uniform.out"); if (0) { # TLB; broken test $test_results += &test_program($update, $continue, "qvalue", "--null qvalue/null.txt qvalue/observed.txt", "qvalue/observed.out"); } # Test fasta-center $test_results += &test_program($update, $continue, "fasta-center", "-len 100 < ../doc/examples/sample-dna-Klf1-200.fa", "../doc/examples/sample-dna-Klf1-200-100.fa", "../scripts"); # Test dinuc shuffle $test_results += &test_program($update, $continue, "fasta-dinucleotide-shuffle", "-f ../doc/examples/sample-dna-Klf1-200-100.fa -t -dinuc", "../doc/examples/sample-dna-Klf1-200-100-shuffled.fa", "../scripts"); # combine dinuc output with sequences to make a background for AME: # we should really slurp and dump to be non-UNIX-dependent `cat ../doc/examples/sample-dna-Klf1-200-100.fa ../doc/examples/sample-dna-Klf1-200-100-shuffled.fa > /var/tmp/sample-dna-Klf1-$$`; # Test AME $test_results += &test_program($update, $continue, "ame", "--silent --verbose 1 --fix-partition 169 --bgformat 0 /var/tmp/sample-dna-Klf1-$$ ". "../doc/examples/Jaspar-subset.meme", "../doc/examples/sample-dna-Klf1-200-100-ame.txt", "../src", # default binary directory undef, # no named output file "/var/tmp/ame.$$.tmp"); # output to a directory exit($test_results); ########################################################################### sub test_scaffold { my($fileroot, $is_dna) = @_; # Create input filenames. my $train_file = "common/$fileroot.fasta"; my $meme_file = "common/$fileroot.meme.html"; my $test_file = "common/$fileroot-test.fasta"; my $linear_model = "mhmm/$fileroot.linear.mhmm"; my $complete_model = "mhmm/$fileroot.complete.mhmm"; my $test_aln = "common/test.aln"; my $test_aln_out = "clustalw2fasta/test.fasta"; my $test_aln_nogap_out = "clustalw2fasta/test.nogap.fasta"; my $test_aln_consensus_out = "clustalw2fasta/test.consensus.fasta"; # All files are in the scaffold directory. my $scaffold_root = "scaffold/$fileroot"; my $dna_switch; if ($is_dna == 1) { $dna_switch = "--dna"; } else { $dna_switch = ""; } # Run a test on each program in succession. &test_program($update, $continue, "meme-io", " --verbosity 1 $meme_file ", "$scaffold_root.meme-io"); &test_program($update, $continue, "fasta-io", " --verbosity 1 $dna_switch $train_file ", $train_file); &test_program($update, $continue, "fasta-io", " --verbosity 1 --many $dna_switch $train_file", $train_file); &test_program($update, $continue, "fasta-io", " --verbosity 1 --blocksize 100 $train_file", "$scaffold_root.fasta"); # Test model I/O. &test_program($update, $continue, "mhmm-io", " --verbosity 1 $linear_model", "$linear_model"); &test_program($update, $continue, "mhmm-io", " --verbosity 1 $complete_model", "$complete_model"); # Test model log conversion. &test_program($update, $continue, "log-hmm", " --verbosity 1 $linear_model", "$linear_model"); &test_program($update, $continue, "log-hmm", " --verbosity 1 $complete_model", "$complete_model"); # Test clustalw2fasta &test_program($update, $continue, "clustalw2fasta", " $test_aln", $test_aln_out); &test_program($update, $continue, "clustalw2fasta", " -nogap $test_aln", $test_aln_nogap_out); &test_program($update, $continue, "clustalw2fasta", " -consensus 100 $test_aln", $test_aln_consensus_out); } ########################################################################### sub test_core { my($fileroot, $is_dna) = @_; # Create input filenames. my $train_file = "common/$fileroot.fasta"; my $meme_file = "common/$fileroot.meme.html"; my $test_file = "common/$fileroot-test.fasta"; # Check different topologies. foreach my $topology ("linear", "complete", "star") { # Create various types of spacer models. foreach my $spacer ("1", "3", "fim") { # FIXME: There is a bug with star topology plus fims. if (($spacer eq "fim") && ($topology eq "star")) { next; } my $model = "$fileroot.$topology"; my $mhmm_params = " --noheader --noparams --type $topology "; if ($spacer eq "fim") { $model .= ".fim"; $mhmm_params .= "--fim "; } elsif ($spacer eq "3") { $model .= ".spacer"; $mhmm_params .= "--nspacer 3"; } $mhmm_params .= " --verbosity 1 $meme_file"; # Create the model. &test_program($update, $continue, "mhmm", $mhmm_params, "mhmm/$model.mhmm"); # Draw the model. &test_program($update, $continue, "draw-mhmm", " --verbosity 1 --consensus mhmm/$model.mhmm ", "draw-mhmm/$model.gvz"); # Set parameters for search routines. my $search_params = "--ethresh 99999 --quiet --width 79 "; # Use different scoring schemes. foreach my $paths ("all", "single") { # Search with the model. my $search_file = "$model.$paths"; if ($paths ne "all") { $search_params .= "--fancy "; } &test_program($update, $continue, "mhmms", " --verbosity 1 --paths $paths $search_params mhmm/$model.mhmm $test_file", "mhmms/$search_file.mhmms"); } } } } ############################################################################ # # programs that are run from src/ need not specify $bindir # and $out_dir if undefined default respectively to # the src folder and no output directory # if $output is defined, this name is used to redirect stdout # ############################################################################ sub test_program { my($update, $continue, $program, $arguments, $good_file, $bindir, $output, $out_dir, $dir_param) = @_; my $result = 1; $output = "/var/tmp/$program.$$.tmp" unless defined($output) || defined ($out_dir); $out_dir = "" unless defined($out_dir); # exact same semantics as before changing to argument $dir_param = '--oc' unless defined($dir_param); $bindir = $src_dir unless $bindir; # Tell the user what's happening. printf('*' x 79 . "\n"); printf("Testing $program . . . \n"); # Some programs only write to a file in a directory. if ($program eq "mcast") { $out_dir = "$output"; system("mkdir $out_dir"); $output .= "/$program.txt"; } elsif ($out_dir) { $output = "$program.txt" unless $output; $output = $out_dir."/".$output; } #if (defined($stdin)) { # $arguments .= " < $stdin"; #} # Run the program and collect the error level. if ($out_dir) { printf("$bindir/$program $dir_param $out_dir $arguments \n"); system("$bindir/$program $dir_param $out_dir $arguments"); $result = $? >> 8; } else { printf("$bindir/$program $out_dir $arguments \n"); system("$bindir/$program $arguments > $output"); $result = $? >> 8; } # Eliminate any lines that might differ, but are irrelevant system("sed -e '/xml-stylesheet/d' $output > $output.sed"); # Diff the results with the desired results. my $diff = `diff $good_file $output.sed`; # Check for problems and die if any happened. if (($diff ne "") || ($result != 0) || ($? >> 8 != 0)) { printf("FAIL\n"); if ($result != 0) { printf("$program had a non-zero exit status ($result)\n"); } else { print("$good_file\n"); print($diff); if ($update) { print(STDERR "Updating $good_file.\n"); `cp $output.sed $good_file`; } } if (!$continue) { exit(1); } $result = 1; } else { # Return success. printf("SUCCESS\n"); $result = 0; } # Delete temporary file(s). if ($out_dir) { system("rm -rf $out_dir"); } else { unlink($output); } unlink("$output.sed"); return $result; } # # Test a program's stdout output. # # # sub test_stdout { my ( $update, $continue, $program, $arguments, $reference_file, $filter, $clean_dir, $program_dir ) = @_; my $failed = 1; my $sep = '-' x 79 . "\n"; # $program_dir = $src_dir unless $program_dir; # Tell the user what's happening. printf('*' x 79 . "\n"); printf("Testing $program . . . \n"); # Print out the command my $cmd = &ExecUtils::stringify_args(catfile($program_dir, $program), @{$arguments}) . ' > output'; print $cmd, "\n"; # Create a temporary file to store the output my $test_fh = tempfile('test_output_XXXXXXXXXX', DIR => $tmpdir, UNLINK => 1); # run the program, storing the stdout to file # and the stderr to a variable my ($status, $error_messages); $status = &ExecUtils::invoke(PROG => $program, BIN => $program_dir, ARGS => $arguments, OUT_FILE => $test_fh, ERR_VAR => \$error_messages); # check status print $error_messages, $sep if $status; # print the output in case it has clues if ($status == -1) { printf("Failed to run %s: %s", $program, $!); } elsif ($status & 127) { printf( "%s died with signal %d, %s coredump.", $program, ($status & 127), ($status & 128) ? 'with' : 'without' ); } elsif ($status != 0) { printf("%s exited with value %d indicating failure.", $program, $? >> 8); } goto CLEANUP if $status; # rewind output seek($test_fh, 0, SEEK_SET); # test for existance of the reference file unless(-e $reference_file) { printf("Could not find the reference file \"%s\"\n", $reference_file); goto UPDATE; } # open reference file my $reference_fh; sysopen($reference_fh, $reference_file, O_RDONLY) or die("Can't open the reference file"); # read both files line by line, apply a filter and compare my ($rtext, $rline, $ttext, $tline); $rline = 0; $tline = 0; while (!(eof($test_fh) && eof($reference_fh))) { $ttext = undef; while (!eof($test_fh) && !defined($ttext)) { chomp($ttext = <$test_fh>); $ttext = $filter->($ttext) if defined($filter); $tline++; } $rtext = undef; while (!eof($reference_fh) && !defined($rtext)) { chomp($rtext = <$reference_fh>); $rtext = $filter->($rtext) if defined($filter); $rline++; } if (defined($ttext) && defined($rtext)) { if ($ttext ne $rtext) { # mismatch! print $error_messages, $sep; printf("Line %d of the reference file does not match line %d ". "of the test output.\nReference File:\n%s\nTest Output:\n%s\n", $rline, $tline, $rtext, $ttext); goto UPDATE; } } elsif (defined($ttext) || defined($rtext)) { # one file is longer print $error_messages, $sep; if (defined($ttext)) { print("The test output is longer than the reference file.\n"); } else { print("The reference file is longer than the test output.\n"); } goto UPDATE; } } $failed = 0; UPDATE: if ($failed && $update) { print("Updating reference file.\n"); binmode($test_fh); seek($test_fh, 0, SEEK_SET); copy($test_fh, $reference_file); } CLEANUP: close($test_fh); # remove the result folder if (defined($clean_dir) && -e $clean_dir) { rmtree($clean_dir); } # print the test result if ($failed) { printf("FAIL\n"); exit(1) unless $continue; } else { printf("SUCCESS\n"); } return $failed; } # # Test a program's xml output. # This assumes that the xml will # be written to a file in a directory # that can be deleted to cleanup. # # sub test_xml { my ( $update, $continue, $program, $arguments, $reference_xml, $test_xml, $ignore_keys, $clean_dir, $program_dir ) = @_; my $failed = 1; my $sep = '-' x 79 . "\n"; $program_dir = $src_dir unless $program_dir; # Tell the user what's happening. printf('*' x 79 . "\n"); printf("Testing $program . . . \n"); # Print out the command my $cmd = &ExecUtils::stringify_args(catfile($program_dir, $program), @{$arguments}); print $cmd, "\n"; # run the program, storing the output my ($status, $output); $status = &ExecUtils::invoke(PROG => $program, BIN => $program_dir, ARGS => $arguments, ALL_VAR => \$output); # check status print $output, $sep if $status; # print the output in case it has clues if ($status == -1) { printf("Failed to run %s: %s\n", $program, $!); } elsif ($status & 127) { printf( "%s died with signal %d, %s coredump.\n", $program, ($status & 127), ($status & 128) ? 'with' : 'without' ); } elsif ($status != 0) { printf("%s exited with value %d indicating failure.\n", $program, $? >> 8); } goto CLEANUP if $status; # check that the output we're expecting actually exists unless (-e $test_xml) { print $output, $sep; # print the output in case it has clues printf("%s did not create the xml output \"%s\"\n", $program, $test_xml); goto CLEANUP; } # check that the reference xml exists unless (-e $reference_xml) { printf("Could not find the reference xml \"%s\"\n", $reference_xml); goto UPDATE; } # diff the xml files my $diff = &DiffXML::diff_xml($reference_xml, $test_xml, @{$ignore_keys}); if ($diff) { print $output, $sep; # print the output in case it has clues print $diff; } else { $failed = 0; } UPDATE: if ($failed && $update) { print("Updating reference xml.\n"); copy($test_xml, $reference_xml); } CLEANUP: # remove the result folder if (-e $clean_dir) { rmtree($clean_dir); } # print the test result if ($failed) { printf("FAIL\n"); exit(1) unless $continue; } else { printf("SUCCESS\n"); } return $failed; } ############################################################################## # TEST PROGRAMS ############################################################################## sub test_dreme { my $test_results = 0; # xml parts to ignore in DREME output my $dreme_xml_ignore = [ '^dreme@(release|version)$', '^dreme:model:command_line#value$', '^dreme:model:(positives|negatives)@(file|last_mod_date)$', '^dreme:model:host#value$', '^dreme:model:when#value$', '^dreme:run_time@.*$' ]; $test_results += &test_xml($update, $continue, 'dreme', ['-oc', 'results/dreme', '-v', 1, '-noxslt', '-p', '../doc/examples/sample-dna-Klf1-200-100.fa'], 'dreme/basic.xml', 'results/dreme/dreme.xml', $dreme_xml_ignore, 'results/dreme', '../scripts'); return $test_results; } sub test_spamo { my $test_results = 0; # xml parts to ignore in SpaMo output my $spamo_xml_ignore = [ '^spamo@(release|version)$', '^spamo:model:command_line#value$', '^spamo:model:host#value$', '^spamo:model:when#value$', '^spamo:files:(sequence_db|motif_db)@(source|last_modified)$', '^spamo:run_time@.*$' ]; $test_results += &test_xml($update, $continue, 'spamo', ['-oc', 'results/spamo', '-v', 1, '-znoxslt', '-margin', 20, '-shared', 1, 'spamo/limits.fasta', 'spamo/primary.meme', 'spamo/secondary.meme'], 'spamo/limits.xml', 'results/spamo/spamo.xml', $spamo_xml_ignore, 'results/spamo'); $test_results += &test_xml($update, $continue, 'spamo', ['-oc', 'results/spamo', '-v', 1, '-znoxslt', '-margin', 20, '-shared', 1, 'spamo/random.fasta', 'spamo/primary.meme', 'spamo/secondary.meme'], 'spamo/random.xml', 'results/spamo/spamo.xml', $spamo_xml_ignore, 'results/spamo'); return $test_results; } sub test_centrimo { my $test_results = 0; $test_results += &test_program($update, $continue, "centrimo", "--noinvoke --verbosity 1" . " ../doc/examples/sample-dna-Klf1-200-100-dreme.txt" . " ../doc/examples/sample-dna-Klf1.fa", "centrimo/site_counts.txt", $src_dir, "site_counts.txt", "results/centrimo"); $test_results += &test_program($update, $continue, "centrimo-plots", "-max 5 -title 'score>=5.00 (bits)' < centrimo/site_counts.txt", "centrimo/centrimo.txt", $script_dir, "centrimo.txt", "results/centrimo", "-oc"); return $test_results; }