#!python3 #----------------------------------------------------------------------------- # cron.tcl -- # This program generates a graph which combines the extra-tropical storm # surge with a computed tide, and observations (where available) to create # an estimate of the combined water level. # It then attempts to ftp the resulting graph, associated text file, # and color coded map to www.nws.noaa.gov/mdl/etsurge. # # 10/2000 Arthur Taylor (RSIS/MDL) Created # 5/2002 Arthur Taylor (RSIS/MDL) Updated # 10/2023 Soroosh Mani (NOS/OCS/CSDL/CMMB) To Python # # Notes: #----------------------------------------------------------------------------- # Global variables: # RAY1(...) # tl : Toplevel to use when using cronshow.tcl # statusList : The stations for which I have calculated the status. # Usually all stations... # Backup_list : The files for which we updated the Waterlevel... # hence we need to "backup" these files on the web server. # got_surge : Flag to say if we downloaded the surge files from the IBM # # Wid : The Width of the main gd image # Hei : The Height of the main gd image # Beg_Time : Start time of data # End_Time : Stop time of data # Num_Hours : Total hours of data ((end+beg)*24 +1) #
: is hours after Beg_Time. # (
,cwl): 99.9 or cwl # (
,obs): observation # (
,anom): anom (obs-(cwl)) # (
,pred): cwl+anom # # cwl : List of (time cwl) pairs # obs : List of (time obs) pairs # anom : List of (time (obs-(cwl)) pairs # pred : List of (time (cwl+anom)) # now : Time the program was sourced. # min_time : Minimum Time to plot # max_time : Maximum Time to plot # src_dir : Path of the source directory. # CronShow : 0 if no cronshow.tcl, 1 if cronshow.tcl #----------------------------------------------------------------------------- import argparse import logging import os import sys import shutil import time import urllib.request from urllib.error import HTTPError from datetime import datetime, timedelta from pathlib import Path import pytz import numpy as np import pandas as pd import xarray as xr sys.path.append(os.environ.get('LIBstofs')) # OR set PYTHONPATH import archive # replace 'archive.tcl' INTERNET = bool(int(os.environ.get('INTERNET', 0))) DATA = Path(os.environ.get('DATA')) # NOTE: Assuming UTC = GMT, although they are not technically the same DT64REF = np.datetime64('1970-01-01T00:00:00') GMT = pytz.timezone('gmt') ZONES = { 'CST': pytz.timezone('US/Central'), 'EST': pytz.timezone('US/Eastern'), 'HST': pytz.timezone('US/Hawaii'), 'PST': pytz.timezone('US/Pacific'), 'YST': pytz.timezone('US/Alaska'), } logging.basicConfig(filename=f'{DATA}/log/database.log', filemode='a') _logger = logging.getLogger(__name__) # NOTE: Numpy datetime is always meant to be utc time, a tz-aware # datetime set to xarray becomes a UTC tz-naive datetime in numpy! # Sometimes xarray converts tz-aware datetime to pandas Timestamp object # array which is still tz-aware, but strangely a tz-aware Timestamp # that is set to xarray is stored as numpy utc tz-naive type!! # ds = xr.Dataset() # ds['time1'] = datetime.now() # as numpy # ds['time2'] = datetime.now(pytz.timezone('est')) # as Timestamp # ds['time3'] = ds.time2.item() # converted to UTC and stored as numpy!! # # Let's go with storing datetim64 on xarray and assuming xarray datetimes # are always stored in UTC def _to_datetime(xr_dt64): ts = (np.datetime64(xr_dt64, 'ns') - DT64REF) / np.timedelta64(1, 's') return datetime.utcfromtimestamp(ts) def parse_cli(): parser = argparse.ArgumentParser() parser.add_argument( '--date', '-d', type=pd.to_datetime, help=( 'Choice for date in ISO 8601-like format.' + 'If offset is not provided it\'s assumed to be UTC' ) ) args = parser.parse_args() return args def process_cli(args): user = {} user['date'] = datetime.now(GMT) if args.date is not None: user['date'] = args.date print(f"Date chosen as {user['date'].strftime('%D %T')}") # Use tz-naive datetimes in GMT time for nm in ['date']: if user[nm].tzinfo is None: # If it's tz-naive assume it's in GMT continue # Else convert to GMT and make it naive user[nm] = user[nm].astimezone(GMT).replace(tzinfo=None) return user ######################################################################### def Init_Time(ds, days_before, days_after): # All times are tz-naive and assumed to be in GMT nHrs = (days_after + days_before) * 24 + 1 hrs = np.arange(nHrs) ret_ds = xr.Dataset( data_vars=dict( **ds.data_vars.variables, begTime=ds['cur'] - np.timedelta64(days_before, 'D'), endTime=ds['cur'] + np.timedelta64(days_after, 'D'), numHours=nHrs, # +1 to account for daylight saving adjustment cases cwl=('hr', np.ones(hrs.shape) * 99.9), obs=('hr', np.ones(hrs.shape) * 99.9), ), coords={ **ds.coords.variables, 'hr': hrs, }, attrs=ds.attrs, ) return ret_ds #******************************************************************************* # Procedure Read_WaterLevel # # Purpose: # Read a combined water level file looking for a particular station. # # Variables:(I=input)(O=output) # ray_name (I) Global array to store the data in. # file (I) The combined waterlevel file in question. # name (I) Name of the station. # month (I) The month the combined water level file is valid for # year (I) The year the combined water level file is valid for # # Returns: -1 if it didn't find the station. # # History: # 10/2000 Arthur Taylor created # # Notes: # Adjusts ray(cwl) #******************************************************************************* def Read_WaterLevel(mut_ds, arch_file, file, month, year, name, temp_file): last_time = archive.Read_ArchCWL(mut_ds, arch_file) with open(file, 'r') as fp: line = fp.readline().split() day = int(line[2][0:2]) hour = int(line[2][2:4]) # tz-naive time created from file datetimes, assumed to be GMT time = datetime(year, month, day, hour, 0, 0) begTime = _to_datetime(mut_ds['begTime'].item()) # If the following is NOT true, then we need to Update the # Archive file. else we are done. if not (time > last_time or pd.isna(last_time)): return 0 backupList = mut_ds.attrs.setdefault('Backup_list', []) backupList.append(Path(arch_file).stem) # Remove ',' before the state code in crontab file name = name.lower() name2 = name.strip() nm_len = len(name2) name2 = f"{name[:nm_len - 4]}{name[nm_len - 3:]}" # Read the rest # cwl is a ref in xarray dataset's underlying array cwl = mut_ds.cwl.values hrs = mut_ds.hr for line in fp: # Name from the model file lower = line.lower() if name2 not in lower: continue wl = line[92:96] if wl == "****": wl = 99.9 else: wl = float(wl) / 10 # TODO: Check issue with ceil/floor tm_hr = int( (time - begTime).total_seconds() / 3600 ) if tm_hr not in hrs: continue # Assumption: tm_hr is the same as positional index cwl[tm_hr] = wl # For storage in New ArchCWL # tz-naive time assumed to be in GMT date = time.strftime('%m/%d/%Y %H') # 13 char nm_tag = f'{name.upper():<75}' if len(name) > 74: nm_parts = name.upper().split(',') nm_stn = ''.join(nm_parts[:-1]).strip() nm_state = nm_parts[-1].strip() nm_tag = f'{nm_stn[:67]}...,{nm_state:>3} ' # 75 char line1 = (nm_tag + f'{date}{line[88:]}').strip('\n') line = fp.readline() # For storage in New ArchCWL line2 = line.strip('\n') for i in range(1, 25): wl = line[-4 + 4 * i:4 * i] if wl == "****": wl = 99.9 else: wl = float(wl) / 10 tm = time + timedelta(hours=i) # TODO: Check issue with ceil/floor tm_hr = int( (tm - begTime).total_seconds() / 3600 ) if tm_hr not in hrs: continue # Assumption: tm_hr is the same as positional index cwl[tm_hr] = wl line = fp.readline() # For storage in New ArchCWL line3 = line.strip('\n') for i in range(1, 25): wl = line[-4 + 4 * i:4 * i] if wl == "****": wl = 99.9 else: wl = float(wl) / 10 tm = time + timedelta(days=1, hours=i) # TODO: Check issue with ceil/floor tm_hr = int( (tm - begTime).total_seconds() / 3600 ) if tm_hr not in hrs: continue # Assumption: tm_hr is the same as positional index cwl[tm_hr] = wl line = fp.readline() # For storage in New ArchCWL line4 = line.strip('\n') for i in range(1, 25): wl = line[-4 + 4 * i:4 * i] if wl == "****": wl = 99.9 else: wl = float(wl) / 10 tm = time + timedelta(days=2, hours=i) # TODO: Check issue with ceil/floor tm_hr = int( (tm - begTime).total_seconds() / 3600 ) if tm_hr not in hrs: continue # Assumption: tm_hr is the same as positional index cwl[tm_hr] = wl line = fp.readline() # For storage in New ArchCWL line5 = line.strip('\n') for i in range(1, 25): wl = line[-4 + 4 * i:4 * i] if wl == "****": wl = 99.9 else: wl = float(wl) / 10 tm = time + timedelta(days=3, hours=i) # TODO: Check issue with ceil/floor tm_hr = int( (tm - begTime).total_seconds() / 3600 ) if tm_hr not in hrs: continue # Assumption: tm_hr is the same as positional index cwl[tm_hr] = wl Path(arch_file).parent.mkdir(exist_ok=True, parents=True) archive.Write_ArchCWL( mut_ds, arch_file, line1, line2, line3, line4, line5, temp_file ) return 0 print(f"Didn't find {name}") return -1 #******************************************************************************* # Procedure Read_ObsFile # # Purpose: # Read a file that contains a stations observations, for tide values, # and observation values. (File was obtained by downloading it as a html # document, and stripping out stuff outside of
 blocks.
#
# Variables:(I=input)(O=output)
#   ray_name   (I) Global array to store the data in.
#   obsFile    (I) The file to read the observations from.
#
# Returns: NULL
#
# History:
#    10/2000 Arthur Taylor created
#
# Notes:
#   Adjusts ray(obs)
#*******************************************************************************
def Read_ObsFile(mut_ds, obsFile, archFile, temp_file, days_before, mllw, msl):

    with open(temp_file, 'w') as ap:
        ap.write("Header line must be in .obs file\n")

        # The False is so we leave the observations in MLLW.
        last_time = archive.Read_ArchObs(mut_ds, archFile, ap, False)
        first_time = pd.NaT

        # Read in the observations.
        if obsFile is None:
            return

        unknown_hours = []
        unknown_replacement = {}
        with open(obsFile, 'r') as fp:
            line = fp.readline()
            begTime = _to_datetime(mut_ds['begTime'].item())
            hrs = mut_ds.hr
            # obs_ary is a ref into xarray dataset's underlying array
            obs_ary = mut_ds.obs.values
            for line in fp:
                dateTime = line.split(',')[0]
                date = dateTime.split()[0].split('-')
#                date = '/'.join([date[1], date[2], date[0]])
                try:
                    # Datetimes from the obs file is assumed to be in GMT
#                    time = pd.to_datetime(f'{date} {dateTime.split()[1]}:00')
                    time = datetime(
						year=int(date[0]),
						month=int(date[1]),
						day=int(date[2]),
						hour=int(dateTime.split()[1].split(':')[0]),
						minute=int(dateTime.split()[1].split(':')[1]),
                    )
                except:
                    continue

                cur_MinSec = time.strftime("%M%S")
                # tz-naive, assumed to be in GMT
                if cur_MinSec == "0000":
                    try:
                        obs = float(line.split(',')[1])
                    except ValueError:
                        obs = 99.9

                    # TODO: Check issue with ceil/floor
                    tm_hr = int(
                        (time - begTime).total_seconds() / 3600.
                    )
                    if tm_hr not in hrs:
                        continue

                    if pd.isna(first_time):
                        first_time = time   

                    if (obs < 99) and (obs > -99):
                        # Assumption: tm_hr is the same as positional index
                        obs_ary[tm_hr] = obs
                    else:
                        # Assumption: tm_hr is the same as positional index
                        obs_ary[tm_hr] = 99.9
                        unknown_hours.append(tm_hr)
                    
                elif cur_MinSec == "0600":
                    time = time - timedelta(minutes=6)
                    # TODO: Check issue with ceil/floor
                    tm_hr = int(
                        (time - begTime).total_seconds() / 3600.
                    )
                    if tm_hr not in hrs:
                        continue
                    try:
                        obs = float(line.split(',')[1])
                    except ValueError:
                        obs = 99.9
                    if (obs < 99) and (obs > -99):
                        unknown_replacement[(tm_hr, 2)] = obs
                    else:
                        unknown_replacement[(tm_hr, 2)] = 99.9

                elif cur_MinSec == "5400":
                    time = time + timedelta(minutes=6)
                    # TODO: Check issue with ceil/floor
                    tm_hr = int(
                        (time - begTime).total_seconds() / 3600.
                    )
                    if tm_hr not in hrs:
                        continue
                    try:
                        obs = float(line.split(',')[1])
                    except ValueError:
                        obs = 99.9
                    if (obs < 99) and (obs > -99):
                        unknown_replacement[(tm_hr, 3)] = obs
                    else:
                        unknown_replacement[(tm_hr, 3)] = 99.9


            for tm_hr in unknown_hours:
                left_value = unknown_replacement.get((tm_hr, 2), 99.9)
                right_value = unknown_replacement.get((tm_hr, 3), 99.9)
                has_replacement = (
                    left_value != 99.9 and right_value != 99.9
                )
                if not has_replacement:
                    continue


                # Assumption: tm_hr is the same as positional index
                obs_ary[tm_hr] = (left_value + right_value) / 2.0

            # TODO: No need to clear?
#            unknown_replacement.clear()

        #Store the new observations.
        if pd.isna(last_time) and not pd.isna(first_time):
            # First time might not be on the 00 hour
            first_time = first_time.replace(
                hour=0, minute=0, second=0, microsecond=0
            )
            last_time = first_time - timedelta(hours=1)

        if not pd.isna(last_time):
            archive.Write_ArchObs(mut_ds, ap, last_time)

    Path(archFile).parent.mkdir(exist_ok=True, parents=True)
    shutil.copy(temp_file, archFile)
    Path(temp_file).unlink()



def Refresh(
    mut_ds,
    surge_stn,
    obs_stn,
    tide_stn,  # unused
    cwl_file,
    surge_flag,
    title,  # unused
    arch_cwl,
    temp_file,
    temp2_file,
    days_before,
    arch_obs,
    display_days,  # unused
    mllw,  # unused
    msl,  # unused
    mhhw,  # unused
    zone,  # unused
    mat,  # unused
    filt_anom,  # unused
):

    if surge_flag:
        if not mut_ds['got_surge']:
            print("Getting Anonymous Surge2")
            # NOTE: We don't need getdata in STOFS
            print(
                "This is already done by the code that massaged the"
                + " ESTOFS data into ETSS format."
                + "\nDon't grab more data into the /model folder."
            )
#            if not getdata.Get_Anonymous_Surge2(mut_ds):
#                _logger.error("had problems getting EtSurge data")
#                print("had problems getting EtSurge data")
#
#                mut_ds['got_surge'] = True:
#                _logger.info("Got data EtSurge")

        if 'cur' not in mut_ds:
            print("reading waterlevel file")
            with open(f'{DATA}/model/{cwl_file}', 'r') as fp:
                line = fp.readline()

            # NOTE: File times are assumed to be in GMT
            day = int(line.split()[2][:2])
            hour = int(line.split()[2][2:4])
            print(f"Hour: {hour} :Day :{day}")

            # Start at current time, and go backward in 24 hour steps
            # until day/hour match day/hour from water level file.
            # 
            # xarray stores in tz-naive datetime64 array
            # so `cur` is tz-naive and assumed to be in GMT
            cur = _to_datetime(mut_ds['now'].item())
            cur_day = cur.day
            while cur_day != day:
                cur -= timedelta(days=1)
                cur_day = cur.day

            # xarray stores in tz-naive datetime64 array
            mut_ds['cur'] = cur.replace(
                hour=hour, minute=0, second=0, microsecond=0
            )

    # Init Beg_time/End_time, and init ray elements to 99.9
    mut_ds = Init_Time(mut_ds, days_before, 4)

    mut_ds['min_time'] = pd.NaT
    mut_ds['max_time'] = pd.NaT

    # Need month year; note `mut_ds['cur']` is tz-naive assumed GMT
    month = _to_datetime(mut_ds['cur'].item()).month
    year = _to_datetime(mut_ds['cur'].item()).year

    Read_WaterLevel(
        mut_ds,
        arch_cwl,
        DATA / 'model' / cwl_file,
        month,
        year,
        surge_stn,
        temp_file
    )

    _logger.info("Have Read water levels.. Get obs?")

    if (obs_stn != "") and (obs_stn[0] != "?"):
        obs_stn = ""
    if obs_stn != "":
        stn = obs_stn.split('=')[1].split('+')[0]
        if INTERNET:
            beg = _to_datetime(mut_ds['begTime'].item())
            end = _to_datetime(mut_ds['endTime'].item())
            url = ('https://tidesandcurrents.noaa.gov/api/datagetter?'
                + 'product=water_level&application=NOS.COOPS.TAC.WL'
                + f'&begin_date={beg.strftime("%Y%m%d %H:%M")}'
                + f'&end_date={end.strftime("%Y%m%d %H:%M")}&time_zone=gmt'
                + f'&station={stn}&datum=MLLW&units=english&interval='
                + '&format=csv'
            ).replace(' ', '%20')
            try:
                fn, hdr = urllib.request.urlretrieve(url, temp_file)
            except HTTPError:
                print(f'Error retrieving station {stn}')
                obs_stn = ""

        else:
            # Copy obs from dcom
            shutil.copy(f'{DATA}/database/{stn}.csv', temp_file)

    _logger.info("Got obs")
    if (obs_stn != "") and (Path(temp_file).stat().st_size > 10000):
        #
        # Problem1: HTML code has 2 pre only one /pre
        # Problem2: The number of char on a line can increase without warning.
        #
        Read_ObsFile(
            mut_ds, temp_file, arch_obs, temp2_file, days_before, mllw, msl
        )
    else:
        Read_ObsFile(
            mut_ds, None, arch_obs, temp_file, days_before, mllw, msl
        )


def Main_Init(mut_ds):
    mut_ds['txt_file'] = DATA / 'default.txt'
    mut_ds.attrs['Backup_list'] = []
    mut_ds['got_surge'] = False


def Main(mut_ds, user):
    # NOTE: In xarray tz-aware datetime is stored as pandas object array,
    # but we want to store as tz-naive numpy.datetime64 (at GMT/UTC value)
    mut_ds['now'] = user['date']
    mut_ds.attrs['timezone'] = GMT

    _logger.info(f"Starting {time.time_ns()}")
    Main_Init(mut_ds)

    with open(DATA / 'data' / 'cron.bnt', 'r') as fp:
        line = fp.readline()
        txtlist = []
        for line in fp:
            print(f"Starting line {line}")
            line = line.split(':')
            if line[0].strip() and line[0].strip()[0] == '#':
                continue

            _logger.info(f"Working with {line}")
            mut_ds['txt_file'] = DATA / 'model' / f'{line[1]}.txt'
            mut_ds['cur_abrev'] = line[1]

            zone = line[16]
            # NOTE: e.g. EST is static timezone, while US/Eastern can
            # be daylight saving depending on date
            #
            # mut_ds['now'] is in tz-naive, the value is in GMT, first
            # we `localize` it and then get the value in the specified
            # timezone to check if the value in the specified timezone
            # has daylight saving offset
            tzinfo = ZONES[zone]
#            is_dst = bool(
#                GMT.localize(
#                    _to_datetime(mut_ds['now'].item())
#                ).astimezone(tzinfo).dst()
#            )
#            if zone != 'HST' and is_dst:
#                zone = f"{zone[0]}DT"
            mllw = float(line[12])
            msl = float(line[14])
            mhhw = float(line[14])
            mat = line[17].split('#')[0]
            filt_anom = line[18]
            Refresh(
                mut_ds=mut_ds,
                surge_stn=line[2],
                obs_stn=line[3],
                tide_stn=line[4],
                cwl_file=Path(line[5]),
                surge_flag=bool(int(line[6])),
                title=line[7],
                arch_cwl=DATA / 'database' / f'{line[1]}.cwl',
                temp_file=DATA / 'temp.txt',
                temp2_file=DATA / 'temp2.txt',
                days_before=5,
                arch_obs=DATA / 'database' / f'{line[1]}.obs',
                display_days=1.5,
                mllw=mllw,
                msl=msl,
                mhhw=mhhw,
                zone=tzinfo,
                mat=mat,
                filt_anom=filt_anom,
            )

            txtlist.append(f'{line[1]}.txt')

    print("")


#-----------------------------------------------------------------------------
# Done with Procs, start program.
#-----------------------------------------------------------------------------


if __name__ == '__main__':
    os.chdir(DATA)

    args = parse_cli()
    user = process_cli(args)
    dataset = xr.Dataset()
    Main(dataset, user)