#! /usr/bin/perl -w

use strict;
use Getopt::Std qw{getopts};
use POSIX qw{ceil};

if($#ARGV<0) {
    print STDERR <<EOT;
This script determines if at least one file that maches a glob is
"ready."  A glob is an expression such as "*.txt" or "wrfout_d01*"
that identifies zero or more files.  A file is said to be "ready"
if it:
   1. has not been modified in a certain amount of time, and
   2. has a certain minimum size.
This script can check several globs at once, in which case the
script will return a failure condition unless each glob refers to at
least one file that is "ready".

Format:

   hwrf_file_ready.pl [-v] [-p] [-d] [-w maxwait] [-S sleeptime] \
            [-s minsize] [-o minage] glob [glob [...]]

glob [glob [...]] = a list of globs to check (ie.: wrfout_d01*).
    when sending these globs from a shell, make sure they are in
    quotes or the shell might expand them.

-d = wait for directories instead of files (NOTE: THE DEFAULS BELOW
    ARE DIFFERENT FOR FILES THAN FOR DIRECTORIES)

-v = be verbose (messages sent to stderr)
-p = print matching filenames to stdout
-1 = only print the first match of each glob
-o minage = specify minimum age in seconds (default: 30 seconds for
     files, 0 seconds for directories)
-s minsize = specify the minimum file size.  Three suffixes are
    recognized: kb (or k), mb (or m) and gb (or g) for kilobytes,
    megabytes and gigabytes, respectively (case-insensitive).  If no
    suffix is specified, then the number is in bytes.  Hence, 10.3gb
    is 10.3 gigabytes (rounded up to the next nearest byte), 2kb is
    2048 bytes, 1mb is 1048576 bytes, and "44" is 44 bytes.
    NOTE: MINSIZE IS DISABLED FOR DIRECTORIES
-w maxwait = loop over and over, waiting until at least one file
    matches the requirements (if this is unspecified, the check will
    only be done once). 
-S sleeptime = number of seconds to sleep between checks (only used
    if -w is specified).
-C = check the simulation signature file after every file check
EOT
}

my $now=time();

my %opts;
getopts('C:o:s:S:w:vpd1',\%opts);
my $verbose=defined($opts{v});
my $print_matches=defined($opts{p});
my $first_match=defined($opts{1});
my $maxwait=$opts{w};
my $sleeptime=defined($opts{S}) ? $opts{S} : 1;
my $want_dir=defined($opts{d});

sub message {
    print(STDERR @_) if $verbose;
}

# to_age: convert a string to a number:
sub to_age( $ ) {
    return 0+shift();
}

# to_byte_size: convert a string that specifies a byte size, to a byte size.
# Takes 100MB, 10.3gb, 30k, 800, etc. and converts to the relevant byte size.
# Always rounds towards +infinity (via ceil).
sub to_byte_size( $ ) {
    sub to_signed_byte_size( $ ) {
        $_=shift;
        /^\s*([+-]?(?:\d+|\.\d+|\d+\.\d+))\s*kb?\s*$/i and return ceil(1024*$1);
        /^\s*([+-]?(?:\d+|\.\d+|\d+\.\d+))\s*mb?\s*$/i and return ceil(1048576*$1);
        /^\s*([+-]?(?:\d+|\.\d+|\d+\.\d+))\s*gb?\s*$/i and return ceil(1073741824*$1);
        return ceil(0+$_);
    }
    my $out=0+to_signed_byte_size($_[0]);
    return 0 if($out<0);
    return $out;
}

# $minsize: min file size in bytes.  Default: 1
my $minsize=defined($opts{'s'}) ? to_byte_size($opts{'s'}) : 1;

# $minage: minimum age in seconds.  Default: 30
my $minage;
if($want_dir) {
  $minage=defined($opts{o}) ? to_age($opts{o}) : 0;
} else {
  $minage=defined($opts{o}) ? to_age($opts{o}) : 30;
}

if(defined($opts{'s'}) && $want_dir) {
  undef $minsize;
  message("You asked for directories but specified a minimum file size.  I don't know how to search for non-empty directories, so I'm ignoring your minimum file size.\n");
}

if(defined($opts{'o'}) && $want_dir) {
  $minage=0;
  message("WARNING: YOU ASKED FOR DIRECTORIES IN hwrf_file_complete.pl BUT YOU SPECIFIED A MINIMUM AGE.  THAT IS NEARLY ALWAYS NOT WHAT YOU WANT TO DO, SINCE IT CAUSES ME TO WAIT UNTIL NO FILE HAS BEEN CREATED (AND POSSIBLY NOT MODIFIED) IN THE SPECIFIED DIRECTORY (OR DIRECTORIES) IN $minage SECONDS.  I SUGGEST YOU ADD -o0 TO YOUR hwrf_file_complete.pl CALL.\n");
}

message("config: ".($print_matches ? "" : "don't ")."print matches\n");
message "config: min file size = $minsize bytes\n";
message "config: min age = $minage seconds\n";
if(defined($opts{w})) {
    message "config: loop max = $maxwait seconds\n";
    message "config: sleep = $sleeptime seconds\n";
}

my $start=$now;
my $first=1;

do {

# Bradley Mabe said that on CCS, the file attributes (including mtime)
# may not be up to date due to the nature of gpfs, and that calling
# sync on the file will refresh the mtime.  Also, Jet has severe file
# caching issues, so we run sync on Jet too for safety.  

# Sync is in different places on different machines:
    if ( -x '/usr/sbin/sync' ) {
        `/usr/sbin/sync`; 
    } elsif (-x '/usr/bin/sync' ) {
        `/usr/bin/sync`; 
    } elsif (-x '/bin/sync' ) {
        `/bin/sync`; 
    }

    if(defined($opts{C}) && $first!=1 && defined($ENV{PARAFLAG}) && $ENV{PARAFLAG} eq 'YES') {
        system("$ENV{USHhwrf}/hwrf_port_checksig.sh");
        $first=undef;
    }
    if(defined($opts{w})) {
        $now=time();
    }
    my @matches=();

    # Loop over all arguments.  The arguments are not filenames --
    # they are globs (ie.: wrfout_d01*)
    my $glob;
    my $globs_matching=0;
    foreach $glob (@ARGV) {
        message("$glob: expanding this glob.\n");

        # Expand the glob to a list of matching filenames:
        my @files=glob($glob);
        my $nfiles=1+$#files;

        message("$glob: expanded to $nfiles files.\n");

        # Loop over all matching file names:
        my ($file,$age);
        my $files_matching=0;
        foreach $file (@files) {
            message("$glob: checking file \"$file\"\n");
            
            # Does the file exist?
            unless(-e $file) {
                message("$file: does not exist\n");
                next;
            }

            if(   -d "$file" && ! $want_dir ) {
              message("$file: is a directory but I want non-directories\n");
              next;
            }
            if( ! -d "$file" &&   $want_dir ) {
              message("$file: I want directories, but this is not a directory\n");
              next;
            }

            # Get detailed information about the file:
            my @stat=stat($file);
            if($#stat<0) {
                # File exists, but stat failed.  Might be due to filesystem issues:
                warn "hwrf_file_ready.pl: $file: Stat failed on existing file \"$file\": $!.  This indicates either permissions problems, network errors, or filesystem errors.\n";
                next;
            }

            # Is the file large enough?
            if(!$want_dir) {
              unless($stat[7]>=$minsize) {
                message("$file: too small: size=$stat[7]<$minsize\n");
                next;
              }
            }

            # Is the file old enough?
            $age=$now-$stat[9];
            unless($age>=$minage) {
                message("$file: not old enough; age=$age sec<$minage\n");
                next;
            }

            # The file exists, is large enough and is old enough.
            message("$file: matches glob \"$glob\", is old enough, is large enough\n");
            $files_matching++;
            push @matches,$file;
        }
        $globs_matching++ if $files_matching>0;
        message("$glob: this glob has $files_matching matches\n");
    }

    # Determine how many globs there were:
    my $nglobs=$#ARGV+1;
    message("hwrf_file_ready.pl: $globs_matching of $nglobs globs matched ready files.\n");

    if($print_matches) {
        # Display the list of matching files, one per line:
        if($first_match) {
            if($#matches>=0) {
                print "$matches[0]\n";
            } else {
                die "hwrf_file_ready.pl: Cannot print first match for each glob.  Some globs did not match.  Aborting";
            }
        } else {
            print("$_\n") foreach(@matches);
        }
    }

    # Determine return value: 0 if all globs matched, 1 otherwise:
    if($globs_matching >= $nglobs) {
        message("hwrf_file_ready.pl: All globs matched files.  Files are ready.\n");
        exit 0;
    } else {
        message("hwrf_file_ready.pl: Some globs did not match files.  Files are not ready yet.\n");
        if(defined($opts{w})) {
            message("hwrf_file_ready.pl: Sleep $sleeptime seconds.\n");
            sleep($sleeptime);
        } else {
            exit 1;
        }
    }
} while(time()-$start <= $maxwait);

exit 1;
