#
# HgAutomate: common cluster, postprocessing and database loading operations.
#
# DO NOT EDIT the /cluster/bin/scripts copy of this file --
# edit ~/kent/src/hg/utils/automation/HgAutomate.pm instead.

# $Id: HgAutomate.pm,v 1.29 2010/05/20 23:13:33 hiram Exp $
package HgAutomate;

use warnings;
use strict;
use Carp;
use vars qw(@ISA @EXPORT_OK);
use Exporter;
use File::Basename;

@ISA = qw(Exporter);

# This is a listing of the public methods and variables (which should be
# treated as constants) exported by this module:
@EXPORT_OK = (
    # Support for common command line options:
    qw( getCommonOptionHelp processCommonOptions
	@commonOptionVars @commonOptionSpec
      ),
    # Some basic smarts about our compute infrastructure:
    qw( choosePermanentStorage
	chooseWorkhorse chooseFileServer
	chooseClusterByBandwidth chooseSmallClusterByBandwidth
	chooseFilesystemsForCluster checkClusterPath
      ),
    # General-purpose utility routines:
    qw( checkCleanSlate checkExistsUnlessDebug closeStdin
	getAssemblyInfo getSpecies machineHasFile
	makeGsub mustMkdir mustOpen nfsNoodge run verbose
      ),
    # Hardcoded paths/commands/constants:
    qw( $gensub2 $para $paraRun $centralDbSql $git
	$clusterData $trackBuild $goldenPath $images $gbdb
	$splitThreshold $runSSH $setMachtype
      ),
);

#########################################################################
# A simple model of our local compute environment with some subroutines
# for checking the validity of path+machine combos and for suggesting
# appropriate storage and machines.

use vars qw( %cluster %clusterFilesystem $defaultDbHost );

%cluster = 
    ( 'swarm' => ,
        { 'enabled' => 1, 'gigaHz' => 2.33, 'ram' => 8,
	  'hostCount' => 1024, },
#      'pk' =>
#        { 'enabled' => 1, 'gigaHz' => 2.0, 'ram' => 4,
#	  'hostCount' => 394, },
      'memk' =>
        { 'enabled' => 1, 'gigaHz' => 1.0, 'ram' => 32,
	  'hostCount' => 32, },
      'encodek' =>
        { 'enabled' => 1, 'gigaHz' => 2.0, 'ram' => 16,
	  'hostCount' => 48, },
#      'kk9' => # Guessing here since the machines are down:
#        { 'enabled' => 0, 'gigaHz' => 1.5, 'ram' => 2,
#	  'hostCount' => 100, },
    );

my @allClusters = (keys %cluster);

%clusterFilesystem =
    ( 'scratch' =>
        { root => '/scratch/data', clusterLocality => 1.0,
	  distrHost => [], distrCommand => '',
	  inputFor => \@allClusters, outputFor => [], },
      'hive' =>
        { root => '/hive/data/genomes', clusterLocality => 0.3,
	  distrHost => ['swarm'], distrCommand => '',
	  inputFor => ['memk', 'encodek', 'swarm'],
	  outputFor => ['memk', 'encodek', 'swarm'], },
#        { root => '/hive/data/genomes', clusterLocality => 0.3,
#	  distrHost => ['pk', 'swarm'], distrCommand => '',
#	  inputFor => ['pk', 'memk', 'encodek', 'swarm'],
#	  outputFor => ['pk', 'memk', 'encodek', 'swarm'], },
#      'san' =>
#        { root => '/san/sanvol1/scratch', clusterLocality => 0.5,
#	  distrHost => ['pk'], distrCommand => '',
#	  inputFor => ['pk', 'memk'], outputFor => ['pk', 'memk'], },
    );

$defaultDbHost = 'hgwdev';

sub choosePermanentStorage {
  # Return the disk drive with the most available space.
  #*** would be good to parameterize instead of hardcoding this!
  confess "Too many arguments" if (scalar(@_) != 0);
  my $maxAvail;
  my $bestRaid;
  for (my $i=1;  $i < 20;  $i++) {
    my $raid = "/cluster/store$i";
    my $df = `df $raid/ 2>&1 | grep -v "No such" | egrep -v '^[A-Za-z]'`;
    if ($df =~ s/.*\s+(\d+)\s+\d+\%.*/$1/) {
      if (! defined $maxAvail || $df > $maxAvail) {
	$maxAvail = $df;
	$bestRaid = $raid;
      }
    }
  }
  if (! defined $bestRaid) {
    $bestRaid = "/hive/data/genomes";
  }
  confess "Could not df any /cluster/store's" if (! defined $bestRaid);
  return $bestRaid;
}

sub getMountPoint {
  # Extract the mount point for a given path from df.
  # This can hang if filesystem is unhappy -- c'est la vie.
  my ($path) = @_;
  my $df = `df $path`;
  if ($df =~ m@\d+\s+\d+\%\s+([/\w]+)$@) {
    return $1;
  } else {
    return undef;
  }
}

sub getClusterFsInfo {
  # Get clusterFilesystem record for the given path, if there is one.
  # Unless path starts with /scratch or /iscratch which may not be the
  # same on localhost as on the cluster nodes,
  #*** would be good to parameterize instead of hardcoding this!
  # use df to determine real location of path.
  my ($path) = @_;
  confess "must have complete, not relative, path" if ($path !~ m@^/@);
  if ($path =~ m@^/(scratch|iscratch)/@) {
    return $clusterFilesystem{$1};
  } else {
    my $mountPoint = &getMountPoint($path);
    foreach my $fs (keys %clusterFilesystem) {
      my $info = $clusterFilesystem{$fs};
      return $info if ($info->{'root'} =~ /^$mountPoint/);
    }
  }
  return undef;
}

sub getOkClusters {
  # Return a list of clusters that are known to be OK for the given path.
  my ($path, $isInput) = @_;
  my $fsInfo = &getClusterFsInfo($path);
  my @okClusters = ();
  if ($fsInfo) {
    @okClusters = $isInput ?
                       @{$fsInfo->{'inputFor'}} : @{$fsInfo->{'outputFor'}};
  }
  return @okClusters;
}

sub getWarnClusters {
  # If path is not on a clusterFilesytem, and it is used as input to a big
  # cluster or output from small cluster, warn but don't die.
  # Would be nice to use cluster parameters here instead of hardcoding.
  my ($path, $isInput) = @_;
  my $fsInfo = &getClusterFsInfo($path);
  if (! $fsInfo) {
    if ($isInput) {
      return @allClusters;
    } else {
      return ('encodek');
    }
  }
}

sub checkClusterPath {
  # Make sure that the list of paths is OK for the given cluster and in/out.
  my ($cluster, $inOrOut, @pathList) = @_;
  confess "Must have at least 3 arguments" if (scalar(@_) < 3);
  my $clusterInfo = $cluster{$cluster};
  if (! defined $clusterInfo) {
    confess "Unrecognized cluster \"$cluster\"";
  }
  if ($inOrOut ne "in" and $inOrOut ne "out") {
    confess "\$inOrOut must be either \"in\" or \"out\"";
  }
  foreach my $p (@pathList) {
    my $isInput = ($inOrOut eq 'in');
    my @okClusters = &getOkClusters($p, $isInput);
    my @warnClusters = &getWarnClusters($p, $isInput);
    my $do = $isInput ? 'take input from' : 'send output to';
    if (scalar(grep /^$cluster$/, @warnClusters)) {
      warn "Warning: Cluster $cluster probably should not $do $p .\n";
    } elsif (! scalar(grep /^$cluster$/, @okClusters)) {
      die "Error: Cluster $cluster cannot $do $p .\n";
    }
  }
}

sub getLoadFactor {
  # Return the load factor (most-recent) for the given machine.
  # If it doesn't produce a recognizable uptime result, return a
  # very high load.
  my ($mach) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  my $cmd = "$HgAutomate::runSSH $mach uptime 2>&1 | grep load";
  verbose(4, "about to run '$cmd'\n");
  my $load = `$cmd`;
  if ($load =~ s/.*load average: (\d+\.\d+).*/$1/) {
    return $load;
  }
  return 1000;
}

sub getWorkhorseLoads {
  #*** Would be nice to parameterize instead of hardcoding hostnames...
  # Return a hash of workhorses (kolossus and all idle small cluster machines),
  # associated with their load factors.
  # swarm and hgwdev are now valid workhorses since they have access to hive.
  confess "Too many arguments" if (scalar(@_) != 0);
  my %horses = ();
  foreach my $machLine ('swarm', 'kolossus', 'hgwdev',
	`$HgAutomate::runSSH encodek parasol list machines | grep idle`,
	`$HgAutomate::runSSH memk parasol list machines | grep idle`) {
    my $mach = $machLine;
    $mach =~ s/[\. ].*//;
    chomp $mach;
    $horses{$mach} = &getLoadFactor($mach) if (! exists $horses{$mach});
  }
  return %horses;
}

sub chooseWorkhorse {
  # Choose a suitable "workhorse" machine.  If -workhorse was given, use that.
  # Otherwise, randomly pick a fast machine with low load factor, or wait if
  # none are available.  This can wait indefinitely, so if it's broken or if
  # all workhorses are down, it's up to the engineer to halt the script.
  confess "Too many arguments" if (shift);
  if ($main::opt_workhorse) {
    return $main::opt_workhorse;
  }
  &verbose(2, "chooseWorkhorse: polling load factors of kolossus and " .
	   "idle small cluster machines.  This may take a minute...\n");
  while (1) {
    my %horses = &getWorkhorseLoads();
    foreach my $maxLoad (0.1, 0.5, 1.0, 2.0) {
      my @fastHorses = ();
      foreach my $horse (keys %horses) {
	push @fastHorses, $horse if ($horses{$horse} <= $maxLoad);
      }
      if (scalar(@fastHorses) > 0) {
	my $randomFastEnough = $fastHorses[int(rand(scalar(@fastHorses)))];
	&verbose(2, "chooseWorkhorse: $randomFastEnough meets load " .
		 "threshold of $maxLoad.\n");
	return $randomFastEnough;
      }
    }
    my $delay = 120;
    &HgAutomate::verbose(1, "chooseWorkhorse: all machines have high load." .
			 "  waiting $delay seconds...\n");
    sleep($delay);
  }
}

sub getFileServer {
  # Use df to determine the fileserver for $path.
  my ($path) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  my $host = `df $path 2>&1 | grep -v Filesystem`;
  if ($host =~ /(\S+):\/.*/) {
    return $1;
  } elsif ($host =~ /^\/\w/) {
    my $localhost = `uname -n`;	# HOST not always present
    if ($localhost =~ s/^(\w+)(\..*)?$/$1/) {
      return $localhost;
    }
  }
  confess "Could not extract server from output of \"df $path\":\n$host\n";
}

sub canLogin {
  # Return true if logins are enabled on the given fileserver.
  #*** hardcoded
  my ($mach) = @_;
  return ($mach =~ /^kkstore/ || $mach eq 'eieio');
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
}

sub chooseFileServer {
  # Choose a suitable machine for an I/O-intensive task.
  # If -fileServer was given, use that.
  # Otherwise, determine the fileserver for $path, and if we can log in
  # on the fileserver, and its load is not too high, return it.
  # Otherwise, use a workhorse machine.
  my ($path) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  if ($main::opt_fileServer) {
    return $main::opt_fileServer;
  }
  my $server = &getFileServer($path);
  verbose(4, "Fileserver from df is '$server'\n");
  $server =~ s/-10$//;
  if ($server && &canLogin($server) && (&getLoadFactor($server) < 2.0)) {
    return $server;
  }
#*** SMALL CLUSTER MACHINES CANNOT WGET OUTSIDE, SO NOT ALWAYS A GOOD CHOICE HERE
  return &chooseWorkhorse();
}

sub chooseClusterByBandwidth {
  # Choose cluster by apparent available bandwidth.
  # Note: this does not take I/O into account, so it's best to call this
  # before distributing inputs instead of after (unless they have been
  # distributed somewhere that is fast for all clusters like /scratch).
  my $onlySmallFast = shift;
  confess "Too many arguments" if (shift);
  my $maxOomph;
  my $bestCluster;
  foreach my $paraHub (keys %cluster) {
    my $clusterInfo = $cluster{$paraHub};
    next if (! $clusterInfo->{'enabled'});
    next if ($onlySmallFast && $clusterInfo->{'gigaHz'} < 2.0);
    my @machInfo = `$HgAutomate::runSSH $paraHub parasol list machines | grep -v dead`;
    my $idleCount = 0;
    my $busyCount = 0;
    foreach my $info (@machInfo) {
      if ($info =~ /idle$/) {
	$idleCount++;
      } else {
	$busyCount++;
      }
    }
    my $batchCount =
      `$HgAutomate::runSSH $paraHub parasol list batches | grep -v ^# | wc -l`;
    my $expectedPortion = 1 / (1 + $batchCount);
    my $oomph = (($idleCount + ($busyCount * $expectedPortion)) *
		 $clusterInfo->{'gigaHz'});
    &verbose(3, "chooseClusterByBandwidth: " .
	     "$paraHub: ((idle=$idleCount + " .
	     "(busy=$busyCount * portion=$expectedPortion)) " .
	     "* speed=$clusterInfo->{gigaHz}) = $oomph\n");
    if (! defined $maxOomph || ($oomph > $maxOomph)) {
      $maxOomph = $oomph;
      $bestCluster = $paraHub;
    }
  }
  if (! defined $bestCluster) {
    confess "Failed to find a live cluster";
  }
  &verbose(2, "chooseClusterByBandwidth: $bestCluster " .
	   "($maxOomph Gop/s est)\n");
  return $bestCluster;
}

sub chooseSmallClusterByBandwidth {
  # Choose small cluster (fast nodes) by apparent available bandwidth.
  # Note: this does not take I/O into account, so it's best to call this
  # before distributing inputs instead of after (unless they have been
  # distributed somewhere that is fast for all clusters like /scratch).
  return chooseClusterByBandwidth(1);
}

sub chooseFilesystemsForCluster {
  # Return a list of suitable filesystems for given cluster and direction.
  my ($cluster, $inOrOut) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  my $clusterInfo = $cluster{$cluster};
  confess "Unrecognized cluster $cluster" if (! $clusterInfo);
  confess "Second arg must be either \"in\" or \"out\""
    if ($inOrOut ne 'in' && $inOrOut ne 'out');
  my @filesystems;
  foreach my $fs (keys %clusterFilesystem) {
    my $fsInfo = $clusterFilesystem{$fs};
    my @okClusters = ($inOrOut eq 'in') ?
       @{$fsInfo->{'inputFor'}} :  @{$fsInfo->{'outputFor'}};
    if (scalar(grep /^$cluster$/, @okClusters)) {
      push @filesystems, $fsInfo->{'root'} if (-d $fsInfo->{'root'});
    }
  }
  push @filesystems, '/hive/data/genomes' if (scalar(@filesystems)<1);
  return @filesystems;
}


#########################################################################
# Support for command line options expected to be common to many
# automation scripts:

use vars qw( @commonOptionVars @commonOptionSpec );

# Common option defaults:
my $defaultVerbose = 1;

@commonOptionVars = qw(
    $opt_workhorse
    $opt_fileServer
    $opt_dbHost
    $opt_bigClusterHub
    $opt_smallClusterHub
    $opt_debug
    $opt_verbose
    $opt_help
    );

@commonOptionSpec = ("workhorse=s",
		     "fileServer=s",
		     "dbHost=s",
		     "bigClusterHub=s",
		     "smallClusterHub=s",
		     "verbose=n",
		     "debug",
		     "help",
		    );

my %optionHelpText = ( 'workhorse' =>
'    -workhorse machine    Use machine (default: %s) for compute or
                          memory-intensive steps.
',
		       'fileServer' =>
'    -fileServer mach      Use mach (default: fileServer of the build directory)
                          for I/O-intensive steps.
',
		       'dbHost' =>
'    -dbHost mach          Use mach (default: %s) as database server.
',
		       'bigClusterHub' =>
'    -bigClusterHub mach   Use mach (default: %s) as parasol hub
                          for cluster runs with very large job counts.
',
		       'smallClusterHub' =>
'    -smallClusterHub mach Use mach (default: %s) as parasol hub
                          for cluster runs with smallish job counts.
',
		       'debug' =>
'    -debug                Don\'t actually run commands, just display them.
',
		       'verbose' =>
'    -verbose num          Set verbose level to num (default %d).
',
		       'help' =>
'    -help                 Show detailed help and exit.
',
		     );

my %optionDefaultDefaults = ( 'workhorse' => 'least loaded',
			      'dbHost' => $defaultDbHost,
			      'bigClusterHub' => 'most available',
			      'smallClusterHub' => 'most available',
			      'verbose' => $defaultVerbose,
			    );


sub getCommonOptionHelp {
  # Return description of common options, given defaults, for usage message.
  # Input is a hash of applicable options and default values (which can be
  # empty, in which case %optionDefaultDefaults will be used).
  # debug, verbose and help will be added if not specified.
  my %optionSpec = @_;
  my $help = "";
  foreach my $opName (sort keys %optionSpec) {
    if (exists $optionHelpText{$opName}) {
      $help .= sprintf $optionHelpText{$opName},
	($optionSpec{$opName} || $optionDefaultDefaults{$opName});
    } else {
      die "HgAutomate::getCommonOptionHelp: unrecognized option '$opName'.\n" .
	"Supported values: " . join(", ", sort keys %optionHelpText) . ".\n";
    }
  }
  $help .= $optionHelpText{'debug'} if (! exists $optionSpec{'debug'});
  if (! exists $optionSpec{'verbose'}) {
    $help .= sprintf $optionHelpText{'verbose'},
      $optionDefaultDefaults{'verbose'};
  }
  $help .= $optionHelpText{'help'} if (! exists $optionSpec{'help'});
  return $help;
}

sub processCommonOptions {
  # Process common command line options as specified above
  # (except -help is up to caller):
  $main::opt_verbose = $defaultVerbose if (! defined $main::opt_verbose);
}


#########################################################################
#	These items should come from a configuration file so this
#	business can be easily set up in other environments.
# Hardcoded paths/command sequences:
use vars qw( 	$gensub2 $para $paraRun $centralDbSql $git
		$clusterData $trackBuild $goldenPath $images $gbdb
		$splitThreshold $runSSH $setMachtype
	   );
use vars qw( $gensub2 $para $paraRun $clusterData $trackBuild
	     $goldenPath $gbdb $centralDbSql $splitThreshold $runSSH );
$gensub2 = '/parasol/bin/gensub2';
$para = '/parasol/bin/para';
$paraRun = ("$para make jobList\n" .
	    "$para check\n" .
	    "$para time > run.time\n" .
	    'cat run.time');
$centralDbSql = "hgsql -h genome-testdb -A -N hgcentraltest";
$git = "/usr/bin/git";

$clusterData = '/hive/data/genomes';
$trackBuild = 'bed';
my $apacheRoot = '/usr/local/apache';
$goldenPath = "$apacheRoot/htdocs-hgdownload/goldenPath";
$images = "$apacheRoot/htdocs/images";
$gbdb = '/gbdb';

# This is the max number of sequences in an assembly that we will consider
# "chrom-based" (allow split tables; per-seq files can fit in one directory)
# as opposed to "scaffold-based" (no split tables; multi-level directory for
# per-seq files, or use set of multi-seq files).
$splitThreshold = 100;

# ssh command and its options, the extra -o options prevent asking
# questions about adding machines to known hosts
$runSSH = "ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes'";

$setMachtype = "setenv MACHTYPE `uname -m | sed -e 's/i686/i386/;'`";

#########################################################################
# General utility subroutines:

sub checkCleanSlate {
  # Exit with an error message if it looks like this step has already been run
  # based on the existence of the given file(s) or directory(ies).
  my ($step, $nextStep, @files) = @_;
  confess "Must have at least 3 arguments" if (scalar(@_) < 3);
  confess "undef input" if (! defined $step || ! defined $nextStep);
  my $problem = 0;
  foreach my $f (@files) {
    confess "undef input" if (! defined $f);
    if (-e $f) {
      warn "$step: looks like this was run successfully already " .
	"($f exists).  Either run with -continue $nextStep or some later " .
	"step, or move aside/remove $f and run again.\n";
      $problem = 1;
    }
  }
  exit 1 if ($problem);
}

sub checkExistsUnlessDebug {
  # Exit with an error message if required files don't exist,
  # unless $opt_debug.
  my ($prevStep, $step, @files) = @_;
  confess "Must have at least 3 arguments" if (scalar(@_) < 3);
  confess "undef input" if (! defined $prevStep || ! defined $step);
  return if ($main::opt_debug);
  my $problem = 0;
  foreach my $f (@files) {
    confess "undef input" if (! defined $f);
    if (! -e $f) {
      warn "$step: output of previous step $prevStep, $f , is required " .
	"but does not appear to exist.\n" .
	"If it actually does exist, then this error is probably due to " .
	"network/filesystem delays -- wait a minute and restart with " .
	"-continue $step.\n" .
	"If it really doesn't exist, either fix things manually or " .
	"try -continue $prevStep\n";
      $problem = 1;
    }
  }
  exit 1 if ($problem);
}

sub closeStdin {
  # If we don't do this, the script can hang ("Suspended (tty input)")
  # when it is run backgrounded (&) and then something is typed into the
  # terminal... or something like that.  Anyway, doesn't hurt.  It does not
  # prevent hanging on ssh prompts, however.
  close(STDIN);
  open(STDIN, '/dev/null');
}

sub getAssemblyInfo {
  # Do a quick dbDb lookup to get assembly descriptive info for README.txt.
  my ($dbHost, $db) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  my $query = "select genome,description,sourceName from dbDb " .
              "where name = \"$db\";";
  my $line = `echo '$query' | $HgAutomate::runSSH $dbHost $centralDbSql`;
  chomp $line;
  my ($genome, $date, $source) = split("\t", $line);
  return ($genome, $date, $source);
}

sub getSpecies {
  # fetch scientificName from dbDb
  my ($dbHost, $db) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  my $query = "select scientificName from dbDb " .
              "where name = \"$db\";";
  my $line = `echo '$query' | $HgAutomate::runSSH $dbHost $centralDbSql`;
  chomp $line;
  my ($scientificName) = split("\t", $line);
  return ($scientificName);
} # getSpecies

sub machineHasFile {
  # Return a positive integer if $mach appears to have $file or 0 if it
  # does not.
  my ($mach, $file) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  confess "undef input" if (! defined $mach || ! defined $file);
  my $count = `$HgAutomate::runSSH $mach ls -1 $file 2>>/dev/null | wc -l`;
  chomp $count;
  return $count + 0;
}

sub makeGsub {
  # Create a gsub file in the given dir with the given contents.
  my ($runDir, $templateCmd) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  confess "undef input" if (! defined $runDir || ! defined $templateCmd);
  chomp $templateCmd;
  my $fh = mustOpen(">$runDir/gsub");
  print $fh  <<_EOF_
#LOOP
$templateCmd
#ENDLOOP
_EOF_
    ;
  close($fh);
}

sub mustMkdir {
  # mkdir || die.  Immune to -debug -- we need to create the dir structure 
  # and dump out the scripts even if we don't actually execute the scripts.
  my ($dir) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  confess "undef input" if (! defined $dir);
  system("mkdir -p $dir") == 0 || die "Couldn't mkdir $dir\n";
}

sub mustOpen {
  # Open a file or else die with informative error message.
  my ($fileSpec) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  confess "undef input" if (! defined $fileSpec);
  open(my $handle, $fileSpec)
    || die "Couldn't open \"$fileSpec\": $!\n";
  return $handle;
}

sub nfsNoodge {
  # the touch of the directory causes NFS to refresh its directory
  # information and thus pick up status change to the file.
  # sometimes localhost can't see the newly created file immediately,
  # so insert some artificial delay in order to prevent the next step
  # from dieing on lack of file:
  my ($file) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  confess "undef input" if (! defined $file);
  return if ($main::opt_debug);
  my $dir = dirname($file);
  for (my $i=0;  $i < 5;  $i++) {
    `touch $dir`;
    sleep(4);
    last if ( -s $file );
  }
}

sub run {
  # Run a command in sh (unless -debug).
  my ($cmd) = @_;
  confess "Must have exactly 1 argument" if (scalar(@_) != 1);
  confess "undef input" if (! defined $cmd);
  if ($main::opt_debug) {
    print "#DEBUG# $cmd\n";
  } else {
    verbose(1, "# $cmd\n");
    system($cmd) == 0 || die "Command failed:\n$cmd\n";
  }
}

sub verbose {
  my ($level, $message) = @_;
  confess "Must have exactly 2 arguments" if (scalar(@_) != 2);
  confess "undef input" if (! defined $level || ! defined $message);
  print STDERR $message if ($main::opt_verbose >= $level);
}


# perl packages need to end by returning a positive value:
1;