#! /usr/bin/perl -W -w
#------------------------------------------------------------------------------
#
# Program Name: get_sfcmarobs.pl
#
# Abstract: This script is used to concatenate all of the NCEP surface marine
#           observation ascii text files from a time span specified by the user.
#           It is a part of COMF (formerly NGOFS), and it gets obs files from
#           ODAAS.
#
# Location: $OQCSDIR/scripts/
#
# Technical Contacts:	Zack Bronder		 Org: NOAA/NOS/CO-OPS
#			Phone: 301-713-2890x152	 E-mail: Zachary.Bronder@noaa.gov
#                       Mark Vincent		 Org: NOAA/NOS/CO-OPS
#			Phone: 301-713-2890x151  E-mail: Mark.Vincent@noaa.gov
#
# Author: Zachary Bronder       Creation Date: November 29, 2004
#
# Language: Perl
#
# Usage: 	Interactively: get_sfcmarobs.pl start_time end_time
#		      example: get_sfcmarobs.pl '2004 12 31 18 00' '2005 01 01 06 00'
# 		Automatically: get_sfcmarobs.pl can be called by model scripts,
#                              such as MAIN_LEOFS.sh, which are launched via cron.
#
# Input Parameters: start_time is the start of the time span of the input obs files.
#                   It consists of integers for year, month, day, hour, and minute.
#                   In model scripts it will usually be $time_hotstart.
#                   end_time is the end of the time span and has the same format.
#                   In model scripts it will usually be $time_nowcastend.
#
# Target computer: Runs on COMF computers, such as glofs.nos.noaa.gov.
#                  Gets input from ODAAS computers, such as odaas1.nos.noaa.gov.
#
# Estimated execution time:
#
# Error Conditions:
#
# Scripts/Programs Called:
# 	Name			Directory Location		Description
#
# Input Files:
#       Name                    Directory  Location             Description
#  YYYYMMDDHHsfcmarobs.txt  $ODAASDIR/atmos/obs/ncep/archives/YYYYMM  obs text file
#
# Output Files:
#       Name                    Directory Location              Description
#       get_sfcmarobs.txt	.		    text file of several concatenated input files
#       get_sfcmarobs.lst       .                   list of input files that are concatenated
# Revisions:
#      	Date	Author                          Description
#  2005-01-14	Z Bronder  Added ability to subset by lat/lon's or water body
#  2005-09-20   Z Bronder  Changed the way the number of files in the list file 
#                          is counted due to variations in versions of the wc command
#  2006-07-24   G Mott     Changed the input times from '2006 07 23 00 00' to '200607230000' 
#                           for porting to ncep to utilize their date utilities.
#                           Changed the directory paths to be set from the launch script only (eliminate hardwire).
#                           Commented out all mention of corms flags.
#                           The data directories had a "200607" name, changed to hourly.20060723, changed
#                           the directory loop to work with new directory name. 
#
# Remarks: For debugging use #! /usr/bin/perl -W -w
#
# ------------------------------------------------------------------------------

#get $CORMSLOG environment variable, path of working directory
$workdir=`pwd`;
# remove carriage return
chomp ($workdir);

#Check for CORMS log file
if ($ENV{'FCSTSYSLOG'})
{
  $cormslog=$ENV{'FCSTSYSLOG'};
  chomp ($cormslog);
  print "\nStarting get_sfcmarobs with cormslog $cormslog.\n";
}
else
{
  $cormslog=$workdir . "/get_sfcmarobs.err";
  print "\nWarning: starting get_sfcmarobs.pl with no FCSTSYSLOG environment variable set!\n";
}

# assign variables
$start_time=$ARGV[0];
$end_time=$ARGV[1];
$option=$ARGV[2];

# check that right number of arguments has been given
if ( @ARGV == 2 )
{
  print "This job will get all surface marine observations between the dates $start_time and $end_time.\n";
  $option="none";
}
elsif ( @ARGV == 4 && $option eq "-b" )
{
  $model=$ARGV[3];
  if ( $model eq "leofs" )
  {
   $body= "ERIE";
  }
  if ( $model eq "lmofs" )
  {
   $body= "MICHIGAN";
  }
  if ( $model eq "lhofs" )
  {
   $body= "HURON";
  }
  if ( $model eq "lsofs" )
  {
   $body= "SUPERIOR";
  }
  if ( $model eq "loofs" )
  {
   $body= "ONTARIO";
  }
  print "This job will get surface marine observations from the body of water $body,\n";
  print "and between the dates $start_time and $end_time.\n";
}
elsif ( @ARGV == 7 && $option eq "-l" )
{
  $minlat=$ARGV[3];
  $minlon=$ARGV[4];
  $maxlat=$ARGV[5];
  $maxlon=$ARGV[6];
  print "This job will get surface marine observations within the coordinates ($minlat, $minlon) and ($maxlat, $maxlon),\n";
  print "and between the dates $start_time and $end_time.\n";
}
else
{
  print "ERROR: get_sfcmarobs.pl needs two arguments for start_time, end_time.\n";
  print "Example: get_sfcmarobs.pl '2005123123' '2006010112'\n";
  print "Optional arguments: You can select records from a specific area instead of all records.\n";
  print "- minimum latitude and longitude, maximum latitude and longitude\n";
  print "  Example: get_sfcmarobs.pl '2005123123' '2006010112' -l 39.5 -83.045 42 -79.35\n";
  print "- body of water\n";
  print "  Example: get_sfcmarobs.pl '2005123123' '2006010112' -b ERIE\n";
  open (CORMS,">>$cormslog");
  print CORMS "GET SFCMAROBS 0\n";
  close (CORMS);
  exit 1;
}

# $MAROBSDIR environment variable must be set
if ($ENV{'MAROBSDIR'})
{
  $marobsdir=$ENV{'MAROBSDIR'};
    chop ($marobsdir);
}
else
{
  print "\nERROR: get_sfcmarobs.pl needs \$MAROBSDIR environment variable to be defined before execution.\n";
  open (CORMS,">>$cormslog");
  print CORMS "GET SFCMAROBS 0\n";
  close (CORMS);
  exit 2;
}

print "Variables: start_time=$start_time, end_time=$end_time,marobsdir=$marobsdir.\n";

$startdir=substr($start_time,0,8);
$enddir=substr($end_time,0,8);
$startpre=substr($start_time,0,12);
$endpre=substr($end_time,0,12);

$listfile=$workdir . "/get_sfcmarobs.lst";
$outfile=$workdir . "/get_sfcmarobs.txt";
$outcount=0;
unlink ($listfile, $outfile);
print "startdir=$startdir,enddir=$enddir,startpre=$startpre,endpre=$endpre,workdir=$workdir,listfile=$listfile,outfile=$outfile\n";
# change directory to the marobs archives
system ("pwd");
chdir ($marobsdir);

# loop over output listed date directories
foreach $loopdir (`ls -1d *hourly*[0-9][0-9][0-9][0-9][01][0-9][0-3][0-9]`)

#######old
#foreach $datedir (`ls -1d [0-9][0-9][0-9][0-9][01][0-9]`)
#######end old
{

########new ncep
  chomp ($loopdir);  
  print "\n loopdir is $loopdir\n";
  $datedir=substr($loopdir,7,15);
#######end new ncep

  chomp ($datedir);
  
#  print "\n datedir is $datedir\n";
  # test if directory date is within the specified limits
  if (($datedir >= $startdir) && ($datedir <= $enddir))
  {

#################new for ncep
    $absdatedir = $marobsdir . "/" . $loopdir;
#################end new

#############old
#    $absdatedir = $marobsdir . "/" . $datedir;
#############end old

    chomp ($absdatedir);
    chdir ($absdatedir);
    $pwdout=`pwd`;
    print "  Change directory to $pwdout";
    # loop over observation files
    foreach $obsfile (`ls -1d [0-9]???[01][0-9][0-3][0-9][0-2][0-9][0-5][0-9]_sfcmarobs.txt`)
    {
#      print "\n observations files are $obsfile\n";
      # get the file's date prefix
      $filepre=substr($obsfile,0,10);
      # test if file date is within limits
      if (($filepre >= $startpre) && ($filepre <= $endpre))
      {
        # write file name to list file
        $pwdir=`pwd`;
	chomp ($pwdir);
	$filepath=$pwdir . "/" . $obsfile;
        open (LISTFILE,">>$listfile");
        #print "   $filepath";
	print LISTFILE "$filepath";
	close (LISTFILE);
      }
    }
  }
}

# loop over each line in the list file
open (LISTFILE,"<$listfile");
while (<LISTFILE>)
{
  $infile=$_;
  chomp ($infile);
  open (INFILE,"<$infile");
  open (OUTFILE,">>$outfile");
  # loop over each line in the input sfc marobs file
  while (<INFILE>)
  {
    # Extract water body, lat, lon from input line
    $inline=$_;
    $inbody=substr($inline,58,9);
    chop ($inbody);
    $inlat=substr($inline,43,6);
    $inlon=substr($inline,50,7);
    #print "inlat=$inlat inlon=$inlon minlat=$minlat minlon=$minlon maxlat=$maxlat maxlon=$maxlon.\n";
    if (($option eq "none") or (($option eq "-b") && ($inbody =~ /$body/)) or (($option eq "-l") && ($inlat >= $minlat) && ($inlat <= $maxlat) && ($inlon >= $minlon) && ($inlon <= $maxlon)))
    {
      #print $inline;
      print OUTFILE $inline;
      $outcount++;
    }
  }
  close (OUTFILE);
  close (INFILE);
}
close (LISTFILE);

############old corms
# Get the number of listed sfcmarobs files for a CORMS flag
if (`wc -l $listfile`)
{
  # @wcout=split(/\s+/,`wc -l $listfile`);
  # ZB 9/20/05; another possibility is 
  # wc -l get_sfcmarobs.lst | tr -d [:blank:][:alpha:][:punct:] 
  # but this wouldn't work if any numbers were in the path, such as cbofs2
  $cormscount=`wc -l $listfile | awk '{print \$1}'`;
  chomp $cormscount;
}
else
{
  $cormscount=0;
}
open (CORMS,">>$cormslog");
print "INSFCMAROBS $cormscount\n";
print CORMS "GET SFCMAROBS $cormscount\n";
print "OUTSFCMAROBS $outcount\n";
print CORMS "OUT SFCMAROBS $outcount\n";
close (CORMS);

print "End get_sfcmarobs.pl\n";
exit;

