#!/usr/bin/env python3 ############################################################################### # Name of Script: read_ascii_storm.py # Contact(s): Logan C. Dawson (logan.dawson@noaa.gov) # Purpose of Script: Read SPC LSR data in csv format and output to # a MET-compatible netcdf file # # History Log: # 1/2023: Script copied from METplus repository ############################################################################### import pandas as pd import os import sys print(f'Python Script: {sys.argv[0]}') # input file specified on the command line # load the data into the numpy array if len(sys.argv) < 2: script_name = os.path.basename(sys.argv[0]) print(f"FATAL ERROR: {script_name} -> Must specify exactly one input file.") sys.exit(1) # Read the input file as the first argument input_file = os.path.expandvars(sys.argv[1]) print(f'Input File: {input_file}') if not os.path.exists(input_file): print("FATAL ERROR: Could not find input file") sys.exit(2) # Read and format the input 11-column observations COLUMN_NAMES = ( "Message_Type", # (1) string "Station_ID", # (2) string "Valid_Time", # (3) string (YYYYMMDD_HHMMSS) "Lat", # (4) numeric (Deg North) "Lon", # (5) numeric (Deg East) "Elevation", # (6) numeric (msl) "Var_Name", # (7) string (or GRIB_Code) "Level", # (8) numeric "Height", # (9) numeric (msl or agl) "QC_String", # (10) string "Observation_Value" # (11) numeric ) # Create a blank dataframe based on the 11 column standard point_frame = pd.DataFrame(columns=COLUMN_NAMES,dtype='str') #Read in the Storm report, 8 columns not matching the 11 column standard temp_data = pd.read_csv(input_file,names=['Time', 'Fscale', 'Location', 'County','Stat','Lat', 'Lon', 'Comment'], dtype=str ,skiprows=1, on_bad_lines='warn') #Strip out any rows in the middle that are actually header rows #Allows for concatenating storm reports together temp_data = temp_data[temp_data["Time"] != "Time"] # Strip out any rows that have any null/NaN values temp_data = temp_data[~temp_data.isnull().any(axis=1)] #Change some columns to floats and ints temp_data[["Lat","Lon"]] = temp_data[["Lat","Lon"]].apply(pd.to_numeric) #Assign approprite columns to point_frame leaving missing as empty strings point_frame["Lat"] = temp_data["Lat"] point_frame["Lon"] = temp_data["Lon"] #point_frame["Station_ID"] = temp_data["County"] point_frame["Station_ID"] = "NA" point_frame["Var_Name"] = "Fscale" point_frame["Message_Type"] = "StormReport" #Assign 0.0 values to numeric point_frame columns that we don't have in the csv file point_frame["Elevation"] = 0.0 point_frame["Level"] = 0.0 point_frame["Height"] = 0.0 #Change Comments into a "QC" string Tornado=1, Hail=2, Wind=3, Other=4 point_frame["QC_String"] = "4" mask = temp_data["Comment"].str.contains('TORNADO') point_frame.loc[mask,"QC_String"] = "1" mask = temp_data["Comment"].str.contains('HAIL') point_frame.loc[mask,"QC_String"] = "2" mask = temp_data["Comment"].str.contains('WIND') point_frame.loc[mask,"QC_String"] = "3" #Time is HHMM in the csv file so we need to use a piece of the filename and #this value to create a valid date string file_without_path = os.path.basename(input_file) #year_month_day = "20"+file_without_path[0:6] year_month_day = os.environ['VDATE'] point_frame["Valid_Time"] = year_month_day+"_"+temp_data["Time"]+"00" #Currently we are only interested in the fact that we have a report at that locaton #and not its actual value so all values are 1.0 point_frame["Observation_Value"] = 1.0 #Ascii2nc wants the final values in a list point_data = point_frame.values.tolist() print(point_data) print("Data Length:\t" + repr(len(point_data))) print("Data Type:\t" + repr(type(point_data))) ########################################################################