#! /usr/bin/env python ############################################################################### # File name: make_synthetic_time_series.py # # # # Author : Zhengtao Cui (Zhengtao.Cui@noaa.gov) # # # # Initial version date: # # # # Last modification date: 9/11/2019 # # # # Description: The driver to create NetCDF time series files from sample RFC # # PI XML files # # # ############################################################################### import os, sys, time, urllib, getopt, re import logging import random import copy from string import * import xml.etree.ElementTree as etree from datetime import datetime, timedelta from collections import OrderedDict from PI_XML import PI_XML from RFC_Forecast import RFC_Forecast from RFCTimeSeries import RFCTimeSeries from RFCHelper import RFCHelper from RFC_Sites import RFC_Sites #import Tracer """ The driver to parse downloaded ACE XML observations and create time series and write to NetCDF files Author: Zhengtao Cui (Zhengtao.Cui@noaa.gov) Date: May 30, 2019 """ def main(argv): """ function to get input arguments """ inputdir = '' try: opts, args = getopt.getopt(argv,"hi:o:s:",["idir=", "odir=", \ "sites="]) except getopt.GetoptError: print('make_synthetic_time_series.py -i -o -s ') sys.exit(2) for opt, arg in opts: if opt == '-h': print( \ 'make_synthetic_time_series.py -i -o -s ') sys.exit() elif opt in ('-i', "--idir"): inputdir = arg if not os.path.exists( inputdir ): raise RuntimeError( 'FATAL Error: inputdir ' + \ inputdir + ' does not exist!' ) elif opt in ('-o', "--odir" ): outputdir = arg if not os.path.exists( outputdir ): raise RuntimeError( 'FATAL Error: outputdir ' + \ outputdir + ' does not exist!' ) elif opt in ('-s', "--rfcsitefile" ): sitefile = arg if not os.path.exists( sitefile ): raise RuntimeError( 'FATAL Error: sitefile ' + \ sitefile + ' does not exist!' ) return (inputdir, outputdir, sitefile) #divtdi = timedelta.__div__ def divtd(td1, td2): if isinstance(td2, (int, long)): # return divtdi(td1, td2) return td1 / td2 us1 = td1.microseconds + 1000000 * (td1.seconds + 86400 * td1.days) us2 = td2.microseconds + 1000000 * (td2.seconds + 86400 * td2.days) return us1 / us2 # this does integer division, use float(us1) / us2 for fp division def syntheticTimeValueQuality( timeperiod, timestep, missingVal, \ template ): template.insert( random.randrange( len(template) ), missingVal ) timeitr = timeperiod[0] tvq = OrderedDict() while timeitr <= timeperiod[1]: tvq[ timeitr ] = ( random.choice( template ), \ random.randrange( 1, 100 ) ) timeitr += timestep return tvq t0 = time.time() logging.basicConfig(format=\ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',\ level=logging.INFO) logger = logging.getLogger(__name__) formatter = logging.Formatter(\ '%(asctime)s - %(name)s - %(levelname)s - %(message)s') #logger.setFormatter(formatter) logger.info( "System Path: " + str( sys.path ) ) if __name__ == "__main__": try: odir = main(sys.argv[1:]) except Exception as e: logger.error("Failed to get program options.", exc_info=True) indir = odir[0] outdir = odir[1] rfcsitefile = odir[2] logger.info( 'Input dir is "' + indir + '"') logger.info( 'Output dir is "' + outdir + '"') logger.info( 'RFC site file is "' + rfcsitefile + '"') # # Load ACE observed XML discharge data # timeperiod = (datetime(2019, 9, 9, 18), \ datetime(2019, 9, 9, 18 ) + \ timedelta( days = 5) ) try: fcsts = [] if not os.path.isdir( indir ): raise RuntimeError( "FATAL ERROR: " + indir + \ " is not a directory or does not exist. ") rfcsites = RFC_Sites( rfcsitefile ) logger.info( 'Reading ' + indir + '/' + '20190909_MARFC_Reservoir_Export.xml' + ' ... ' ) try: group1 = [ "HAWC1", "ELPC1", "SVIC1", "LKPC1", "CCHC1", "PYMC1",\ "ISAC1", "SCSC1", "TMDC1", "NACC1", "SNRC1", "LEXC1",\ "CADC1", "COYC1", "ANDC1", "MSWC1", "LNGC1", "FRAC1",\ "PFTC1", "BPRC1", "TOPN2", "HETC1", "CHVC1", "KNFC1", \ "NHGC1", "CMCC1" ] group2 = [ "NIMC1", "LBEC1", "WSDC1", "LAMC1", "EPRC1", "SGEC1" ] group3 = [ "INVC1", "BLBC1", "ORDC1", "HLEC1", "CFWC1", "NBBC1", \ "DNRC1", "TAHC1", "BCVC1", "STPC1", "PSRC1", "WHSC1", \ "LLKC1", "MRUC1", "IRGC1" ] pixml = PI_XML( indir + '/20190909_MARFC_Reservoir_Export.xml' ) rfc_series = pixml.toRFCForecast() for sta in group1: print( "group1: " + sta ) s = copy.deepcopy( \ rfc_series[ random.randrange( len( rfc_series ) ) ] ) if not s.isEmpty(): s.forecastDate = timeperiod[0] + s.getOffsetTime() if sta in [ "HAWC1", "ELPC1", "SVIC1" ]: timeperiod1 = ( timeperiod[0] + timedelta( hours = 6 ), \ timeperiod[1] + timedelta( hours = 6 ) ) tvq = syntheticTimeValueQuality( timeperiod1, \ timedelta( seconds = s.getTimeStepInSeconds()),\ float( s.missVal ), s.getValuesInCMS() ) s.fstPeriod = timeperiod1 else: tvq = syntheticTimeValueQuality( timeperiod, \ timedelta( seconds = s.getTimeStepInSeconds()),\ float( s.missVal ), s.getValuesInCMS() ) s.fstPeriod = timeperiod s.timeValueQuality = tvq s.stationID = sta # print( s.timeValueQuality ) # print( s.fstPeriod ) print( s.stationID ) fcsts.append( s ) rfcsites.addComment( s.get5CharStationID(), \ "OK") else: rfcsites.addComment( s.get5CharStationID(), \ "Empty") for sta in group2: print( "group2: " + sta ) s = copy.deepcopy( \ rfc_series[ random.randrange( len( rfc_series ) ) ] ) tvq = syntheticTimeValueQuality( timeperiod, \ timedelta( seconds = s.getTimeStepInSeconds()),\ float( s.missVal ), s.getValuesInCMS() ) tvq[ timeperiod[0] ] = ( float( s.missVal ), \ random.randrange(1, 100) ) tvq[ timeperiod[0] + timedelta(hours=6)] = \ ( float( s.missVal ), random.randrange(1, 100) ) tvq[ timeperiod[0] + timedelta(hours=12)] = \ ( float( s.missVal ), random.randrange(1, 100) ) s.timeValueQuality = tvq s.forecastDate = timeperiod[0] + s.getOffsetTime() s.fstPeriod = timeperiod s.stationID = sta fcsts.append( s ) rfcsites.addComment( s.get5CharStationID(), \ "OK") for sta in group3: print( "group3" + sta ) s = copy.deepcopy( \ rfc_series[ random.randrange( len( rfc_series ) ) ] ) tvq = syntheticTimeValueQuality( timeperiod, \ timedelta( seconds = s.getTimeStepInSeconds()),\ float( s.missVal ), \ [float(s.missVal), float(s.missVal)] ) s.timeValueQuality = tvq s.forecastDate = timeperiod[0] + s.getOffsetTime() s.fstPeriod = timeperiod s.stationID = sta fcsts.append( s ) rfcsites.addComment( s.get5CharStationID(), \ "OK") except Exception as e: logger.warn( repr( e ), exc_info=True ) if not fcsts: raise RuntimeError( "FATAL ERROR: " + indir + \ " has no PI xml files") for s, r, c in zip( rfcsites.gauge, rfcsites.RFC, rfcsites.comments ): logger.info( 'Site info: ' + s + " " + r + " " + c ) except Exception as e: logger.error("Failed to load PI XML files.", exc_info=True) helper = RFCHelper( fcsts ) logger.info( 'Earliest time in PI XML: ' + \ helper.timePeriodForAll()[0].isoformat() ) logger.info( 'Latest time in PI XML: ' + \ helper.timePeriodForAll()[1].isoformat() ) # # Create time series from loaded observations # # Set time resolution to 60 minutes # and # Write time series to NetCDF files # try: timeseries = helper.makeAllTimeSeries( timedelta( minutes = 60 ), \ outdir, 'RFCTimeSeries.ncdf' ) except Exception as e: logger.error("Failed to make time series.", exc_info=True) logger.error("Input dir = " + indir, exc_info=True) logger.info( "Total number of timeseries: " + str( timeseries ) ) logger.info( "Program finished in: " + \ "{0:.1f}".format( (time.time() - t0) / 60.0 ) + \ " minutes" )