import netCDF4 import pandas as pd import sys import glob import errno import os import numpy as np from datetime import datetime, timedelta from dateutil import parser from PI_XML import PI_XML from RFC_Sites import RFC_Sites print (datetime.utcnow()) #rfcs = [ 'abrfc', 'cnrfc', 'marfc', 'ncrfc', 'nwrfc', 'serfc', \ # 'cbrfc', 'lmrfc', 'mbrfc', 'nerfc', 'ohrfc', 'wgrfc' ] rfcs = [ 'nerfc' ] #pixmlfile = '/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/testdata/abrfc/2019112712_ABRFC_RES_NWM_pixml_export.xml' #pixmlfile = '/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/testdata/cnrfc/201911290600_CNRFC_Reservoir_Export_for_NWM.xml' #pixmlfile = '/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/testdata/marfc/2019112712_MARFC_Reservoir_Export.xml' #pixmlfile = '/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/testdata/ncrfc/201911181200_NCRFC_Reservoir_Export.xml' sitefile = "/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/RFC_Reservoir_Locations_for_Forecast_Ingest_into_NWM_All_RFCs.csv" rfcsites = RFC_Sites( sitefile ) for rfc in rfcs: pixmlfiles = glob.glob('/gpfs/hps3/nwc/noscrub/Zhengtao.Cui/nwtest3/nwm.v2.1/ush/rfc_ingestion/testdata/' + rfc + '/*.xml') #pixmlfiles = glob.glob('/gpfs/hps3/ptmp/Zhengtao.Cui/testdata/' + rfc + '/*.xml') #pixmlfiles = glob.glob('/gpfs/hps3/ptmp/Zhengtao.Cui/test/*.xml') # pixmlfiles = glob.glob('/gpfs/hps3/stmp/Zhengtao.Cui/' + rfc + '/*.xml') # path = '/gpfs/hps3/stmp/Zhengtao.Cui/csv/' + rfc + '/*.ncdf' #path = '/gpfs/hps3/stmp/Zhengtao.Cui/' + rfc + '_ts/*.ncdf' path = '/gpfs/hps3/ptmp/Zhengtao.Cui/' + rfc + '_ts/*.ncdf' #path = '/gpfs/hps3/ptmp/Zhengtao.Cui/test_ts/*.ncdf' files = glob.glob(path) for pixmlfile in pixmlfiles: print( pixmlfile ) pixml = PI_XML( pixmlfile, rfcsites ) allids = pixml.getAllStationIDs() #for id in allids: # ts = pixml.getObservedAndForecastForID( id ) # ts[0].print() # ts[1].print() #https://stackoverflow.com/questions/44359869/convert-netcdf-file-to-csv-or-text-using-python #path = '/glade/scratch/mehdi/test_persistence/rfc/rfc_timeslices/*.ncdf' #path = '/gpfs/hps3/stmp/Zhengtao.Cui/csv/abrfc/*.ncdf' #path = '/gpfs/hps3/stmp/Zhengtao.Cui/csv/cnrfc/*.ncdf' #path = '/gpfs/hps3/stmp/Zhengtao.Cui/csv/marfc/*.ncdf' #os.chdir('/glade/scratch/mehdi/test_persistence/rfc/rfc_timeslices/csv_conversions') for name in files: #####timeslice_nc_file = '/glade/scratch/mehdi/test_persistence/rfc/rfc_timeslices/csv_conversions/2019-11-08_18.60min.STPC1.RFCTimeSlice.ncdf' #####nc = netCDF4.Dataset(timeslice_nc_file, mode='r') #os.chdir('/glade/scratch/mehdi/test_persistence/rfc/rfc_timeslices/csv_conversions') siteid = os.path.basename( name )[20:25] ts = pixml.getObservedAndForecastForID( siteid ) if siteid in allids and os.path.basename( name )[0:13] == \ ts[0].getT0().strftime("%Y-%m-%d_%H"): #print (name) nc = netCDF4.Dataset(name, mode='r') nc.variables.keys() #time = nc.variables['queryTime'][:] #https://stackoverflow.com/questions/37079883/string-handling-in-python-netcdf4 time = nc.variables['issueTimeUTC'][:].tostring().decode() observed_counts = nc.variables['observedCounts'][:] forecast_counts = nc.variables['forecastCounts'][:] total_counts = nc.variables['totalCounts'][:] discharges = nc.variables['discharges'][:] #####print (discharges[0,:]) discharges_1d = (discharges[0,:]) #print (time) #print ('!!!') #time_var = (time[0]) observed_counts_var = (observed_counts[0]) observed_counts_item = observed_counts_var.item() forecast_counts_var = (forecast_counts[0]) forecast_counts_item = forecast_counts_var.item() total_counts_var = (total_counts[0]) total_counts_item = total_counts_var.item() #print ('-------') time_sub = time[0:13] #print (time_sub) #https://www.journaldev.com/23365/python-string-to-datetime-strptime date_object = datetime.strptime(time_sub, '%Y-%m-%d_%H') #print (date_object) t0_time = date_object obs_start_time = t0_time - timedelta(hours = observed_counts_item) forecast_end_time = t0_time + timedelta(hours = (forecast_counts_item - 1)) #print (observed_counts_item) #print (forecast_counts_item) #print (t0_time) #print (obs_start_time) #print (forecast_end_time) #print ('======') #print (time_var) #print (observed_counts_var) #print (discharges_1d) #t0_time_hours = time_var / 3600 #print (time_var % 3600) #print (t0_time_hours) #obs_start_time = t0_time_hours - observed_counts_var #forecast_end_time = t0_time_hours + forecast_counts_var #total_time = forecast_end_time - obs_start_time #print ('-----------------') #print (total_time) #print (total_counts_var) #if (total_time != total_counts): # print ('total_time not equal total_counts for: ') # print (name) # print ('-----------------') #time_array = np.arange(obs_start_time, forecast_end_time) time_array = np.array([obs_start_time + timedelta(hours = i) for i in range(total_counts_item)]) #print ('------------------------') print (name) #print (time_array) with open(name + '_discharges_raw.csv', 'w') as discharge_raw_csv: discharge_raw_csv.write(" NetCDF (CMS) " ) discharge_raw_csv.write("PIXML (" + ts[0].unit + ")\n" ) for t, d in zip( time_array, discharges_1d ): if t == ts[0].getT0(): discharge_raw_csv.write( t.strftime( "%m/%d/%Y %H:%M:%S**" ) ) else: discharge_raw_csv.write( t.strftime( "%m/%d/%Y %H:%M:%S" ) ) discharge_raw_csv.write( " {0:10.4f}".format( d ) ) if ts[0].get5CharStationID() == 'RKWM1': ts[0].print() print( t ) if t in ts[0].timeValueQuality: discharge_raw_csv.write( " {0:10.4f}".format( \ ts[0].timeValueQuality[t][0] ) ) if ts[0].get5CharStationID() == 'RKWM1': print(t, ts[0].timeValueQuality[t][0] ) elif t in ts[1].timeValueQuality: discharge_raw_csv.write( " {0:10.4f}".format( \ ts[1].timeValueQuality[t][0] ) ) if ts[0].get5CharStationID() == 'RKWM1': print(t, ts[1].timeValueQuality[t][0] ) else: pass discharge_raw_csv.write( '\n' ) #print (discharges.shape) #print (type(discharges)) ###### a pandas.Series designed for time series of a 2D lat,lon grid #discharges_ts = pd.Series(discharges_1d, index=None) discharges_ts = pd.Series(discharges_1d, index=time_array) ########discharges_ts = pd.Series.as_matrix(discharges, columns=None) discharges_ts.to_csv(name + '_discharges.csv')