from time import time # Python standard library datetime module import csv import glob import math from datetime import datetime import sys, getopt from dateutil import parser import xarray as xr import pandas as pd from netCDF4 import Dataset, num2date # http://code.google.com/p/netcdf4-python/ # for testing - kinda like a unit test of three locations from a NC file testCW = {'Node': '9642845', 'NWSLI': 'NORL1', 'Site Type': 'HG'},{'Node': '10236205', 'NWSLI': 'MCGL1', 'Site Type': 'HG'} # grabbing the LID, site type and node from the nodes array from crosswaljk def get_all_cdf_nodes(nodes, domain): # node_count = [] # combine_nodes = {} combine_node = [] nodeKey = 'newFileKey/' + domain + '.csv' # using the key - node from CW to compare node to keyfile node to get the right index nk = pd.read_csv(nodeKey, header=None) # removing the header names nk.index.name = None # loop through the crosswalk nodes for nc in nodes: node = nc['node'] # comparing the CW node with the key file node to get the index converted to a list get_v = nk[nk[0]==node].index.tolist() #adding the new list index to the nodes if get_v: # print(f'getV',node, get_v[0]) index_np = get_v[0] nc['np_index'] = index_np combine_node.append(nc) #print(f'combine_node xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', combine_node) return combine_node #this function is called for each location to add all the elevation to the shef file then printing it to the file def convert_to_shef(index, site_source,lid,el_panda, save_shef, time, timeHours, shortName, isInterweaveFirstFile): shef = "" timezone = "Z" rangeAddZero = '0{}'.format(timeHours) hourlyRange = "DIH{}".format(rangeAddZero) list_1 = [] todays_date = datetime.today().strftime('%Y%m%d%H%M') date_from_file = time.strftime("%Y%m%d") min_sec = time.strftime("%H%M") count = 0 # loop through all the indexes of the NC files, should have a match 1 for 1 of files to elevation's. Each file has one elavation time per location for i, get_el in enumerate(el_panda): count = count + 1 values = '' #print(f'get_el ', el_panda[0]) # making sure that list_1 has a value and if it is the 12 value to return a line and remove last slash and add E + the next number if list_1: if not (i % 12): values = values + '\n'+ ".E" + str(int(i/12)+1) + " " # scrape the list char off of the prior element list_1[-1] = list_1[-1][:-1] else: list_1.append('.E1 ') # using the index to grab the right elavation value = el_panda[i][index] # This will fix any values that are nan isTrue = math.isnan(value) if isTrue: value = -9999.00 to_feet = -9999.00 else: # converting to feet to_feet = value * 3.280839895 # rounding the decimal to_feet = round(to_feet, 3) if isInterweaveFirstFile == 'yes': if count % 12: values = values + " " +str(to_feet)+'/' else: values = values + " " +str(to_feet) elif isInterweaveFirstFile == 'no': values = values + " " +str(to_feet)+'/' else: values = values + " " +str(to_feet)+'/' # adding each string elevation value to the time series list_1.append(values) # creating the shef string for each location shef = shef + ".E " +lid+ " " +str(date_from_file)+ " " +timezone+ " DC"+str(todays_date)+"/DH"+min_sec+"/"+site_source+"I"+shortName+"Z/"+hourlyRange+"\n" + "".join(list_1)+"\n" # printing the shef string to a file save_shef.writelines(shef) save_shef.flush() # passing in nodes and input DIR def process_nodes(nodes, inputDir, save_shef, timeHours, shortName, isInterweaveFirstFile, domain): input_dir = inputDir + '*.nc' # get all the netCDF files and merge into one data array - this is mergeing all the variables print('getting files') files = xr.merge([xr.open_dataset(f) for f in glob.glob(input_dir)]) # print(f'netCDF' ,files) date_created = files.time[0].values # retreive the elevations from the merged cdfs el_cdf = files.elevation[:][:].values # retreive the nodes from the merged cdfs # node_cdf = files.nSCHISM_hgrid_node_orig_ind.values # parsing the time from ISO 8601 format to UTC forecastTime = parser.parse(str(date_created)) # convert the variables elevation and node to pandas array so we can use the index # node_panda = pd.Series(node_cdf) el_panda = pd.Series(list(el_cdf)) # print(f'el_panda', el_panda) # for testing purposes # for index, n in enumerate(node_panda): # if index <= 10: # print(n) # passing the nodes to get only the ones in the crosswalk from the pandas array of nodes all_cw_node = get_all_cdf_nodes(nodes, domain) if all_cw_node == 'none': print(f'The domain is wrong please use the one for the right netCDF') else: # loop through the nodes and grab the for x in all_cw_node: lid = x['NWSLI'] site_source = x['site type'] index = x['np_index'] convert_to_shef(index, site_source, lid, el_panda, save_shef, forecastTime, timeHours, shortName, isInterweaveFirstFile) # this is called from the main and grabs all the nodes from crosswalk def get_crosswalk_nodes(cwfile): all_nodes = [] for index, value in enumerate(cwfile): all_nodes.append(value) return all_nodes # in the console to run this application you need to add values for: inputDir, outputDir, crosswalkFile - more instructions in the readme file def main(argv): inputDir = '' outputDir = '' crosswalkFile = '' headerName = '' region = '' domain = '' isInterweaveFirstFile = '' try: opts, args = getopt.getopt(argv,"hi:o:c:n:r:d:w:",["inputDir=","outputDir=", "crosswalkFile=", "headerName=", "region=", "domain=", "isInterweaveFirstFile="]) except getopt.GetoptError as err: print(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print('*************************************') print('-i is for the input diretory - inputDir - just the location to the NC files') print('*************************************') print('-o is for the output diretory - outputDir - do not forget to end with the .shef') print('*************************************') print('-c is for the crosswalk file - crosswalkFile') print('*************************************') print('-n is for the header name - example: "SRUS70 KXXX 121636" - headerName') print('*************************************') print('-r is for the region to use - ATLEANA = Atlantic/Gulf Extended AnA ect.... look in readme for all examples- region') print('*************************************') print('-d is for the domain to use - ATL, PAC, PRV, HAW') print('*************************************') print('-w is for the boalean, if the file will be split - this is to be marked yes for just the first file of interweave') print('*************************************') print('example would be: python3 conversion_shef.py --inputDir ./netCDF_In/ --outputDir ./shef_out/shef.shef --crosswalkFile ./netCDF_In/ObsLocationRequests.csv --headerName "SRUS74 KWNO 121636" --region "ATLEANA" --domain "ATL"') sys.exit() elif opt in ("-i", "--inputDir"): inputDir = arg elif opt in ("-o", "--outputDir"): outputDir = arg elif opt in ("-c", "--crosswalkFile"): crosswalkFile = arg elif opt in ("-n", "--headerName"): headerName = arg elif opt in ("-r", "--region"): region = arg elif opt in ("-d", "--domain"): domain = arg elif opt in ("-w", "--isInterweaveFirstFile"): isInterweaveFirstFile = arg #header for top of shef file. the value is intered into a bash variable # if a new output time is needed other than 1 or 3 then add new variable below mediumOutPutTime = '1' shortOutPutTime = '1' timeHours = '0' shortName = '' hCode = '' headerNav = 'NAVD88' if region == 'ATLSR': timeHours = shortOutPutTime shortName = 'FQ' elif region == 'ATLSRP': timeHours = mediumOutPutTime hCode = 'HH' shortName = 'FQ' # elif region == 'ATLANA': # timeHours = '1' # shortName = 'FP' # elif region == 'ATLEANA': # timeHours = '1' # shortName = 'FQ' elif region == 'ATLMR': timeHours = mediumOutPutTime shortName = 'FP' elif region == 'ATLMRP': timeHours = mediumOutPutTime hCode = 'HH' shortName = 'FP' elif region == 'ATLMRB': timeHours = mediumOutPutTime shortName = 'FW' elif region == 'ATLMRBP': timeHours = mediumOutPutTime hCode = 'HH' shortName = 'FW' # elif region == 'PACEANA': # timeHours = '1' # shortName = 'FG' # elif region == 'PACANA': # timeHours = '1' # shortName = 'FL' elif region == 'PACSR': timeHours = shortOutPutTime shortName = 'FM' elif region == 'PACSRP': timeHours = shortOutPutTime hCode = 'HH' shortName = 'FM' elif region == 'PACMR': timeHours = mediumOutPutTime shortName = 'FR' elif region == 'PACMRP': timeHours = mediumOutPutTime hCode = 'HH' shortName = 'FR' elif region == 'PACMRB': timeHours = mediumOutPutTime shortName = 'FX' elif region == 'PACMRBP': timeHours = mediumOutPutTime hCode = 'HH' shortName = 'FX' # elif region == 'PRANA': # timeHours = shortOutPutTime # shortName = 'FP' elif region == 'PRSR': timeHours = shortOutPutTime shortName = 'FQ' headerNav = 'LMSL' elif region == 'PRSRP': timeHours = shortOutPutTime hCode = 'HH' headerNav = 'LMSL' shortName = 'FQ' # elif region == 'HAWANA': # timeHours = '1' # shortName = 'FP' elif region == 'HAWSR': timeHours = shortOutPutTime shortName = 'FQ' headerNav = 'LMSL' elif region == 'HAWSRP': timeHours = shortOutPutTime hCode = 'HH' headerNav = 'LMSL' shortName = 'FQ' else: print('Region needs to be typed correctly - if you see this message then retype the region based on the names in the readme file') # One can add other checkes here to see if the value of the bash parmeters are valid if timeHours != '0': getNodeCount = [] combineDomain = {} header = "000\n{}\nTIDTWE\n:SHEF ENCODED {} HOUR TOTAL WATER LEVEL GUIDANCE\n:WATER ELEVATION VALUES REFERENCED TO {} IN FEET\n:PROVIDED BY DOC/NOAA/NWS/OWP\n".format(headerName,timeHours, headerNav) # opens the shef file and clears previous file contents save_shef = open(outputDir, "w") # if interweave == 'no': # # writes the header to the shef file save_shef.writelines(header) # input_dir = "/home/michael.aucoin/projects/nwm-conversions/netCDF_In/nwm.t00z.short_range_coastal.total_water.f003.atlgulf.nc" # ds = xr.open_mfdataset(input_dir) # print(f'getting crosswalk', ds) # getting the contents of the crosswalk and puting them in a dictionary with open(crosswalkFile, mode='r') as infile: reader = csv.DictReader(infile, skipinitialspace=True) cw = [ r for r in reader] # get the full csv file in a dictionary # start processing # ************************************** # print('cw', cw) addHM = {} for index, crosswalk in enumerate(cw): # if hCode == 'HH': if crosswalk['Site Type'] == 'HG' and hCode != 'HH': #print(f'hcode here', crosswalk['Node'], hCode) addHM = { 'Longitude': crosswalk['Longitude'], 'Latitude': crosswalk['Latitude'], 'NWSLI': crosswalk['NWSLI'], 'Station ID': crosswalk['Station ID'], 'Site Type' : 'HM', 'Data Source': crosswalk['Data Source'], 'Node': crosswalk['Node'], 'Correction': crosswalk['Correction'], 'Domain': crosswalk['Domain'], 'Region': crosswalk['Region'], 'RFC': crosswalk['RFC'] } cw.insert(index + 1, addHM) for crosswalk in cw: if crosswalk['Domain'] == domain: if hCode != 'HH': hCode = crosswalk['Site Type'] combineDomain = { 'node': int(crosswalk['Node']), 'site type': hCode, 'NWSLI': crosswalk['NWSLI'] } getNodeCount.append(combineDomain) cw_data = get_crosswalk_nodes(getNodeCount) # process the nodes and create the shef print(f'processing nodes') process_nodes(cw_data, inputDir, save_shef, timeHours, shortName, isInterweaveFirstFile, domain) # ************************************* print(f'Shef file is complete - look for the complete shef file at: ', outputDir) # closes the shef file save_shef.close() # to view the shef file after creation in the console/terminal remove the comments below # file1 = open(outputDir, 'r') # print(file1.read()) # file1.close() if __name__ == "__main__": main(sys.argv[1:])