#!/usr/bin/env python3 import sys import os import argparse import logging import pandas as pd import numpy as np from netCDF4 import Dataset from pathlib import Path class WDQMS: def __init__(self, inputfiles, wdqms_type, outdir, loglvl=logging.INFO): # Start logging logging.basicConfig(filename='file.log', filemode='w', level=loglvl, format='%(levelname)s:%(message)s') self.wdqms_type = wdqms_type self.outdir = outdir logging.info("Working in wdqms()") # Create dataframes from GSI diag files logging.info('Creating dataframe from GSI diag files ...') self.wdqms_type_dict = { 'SYNOP': { 'df_type': self._create_conv_df, 'obs_types': [181, 187, 281, 287], 'variable_ids': {'ps': 110, 'q': 58, 't': 39, 'u': 41, 'v': 42} }, 'TEMP': { 'df_type': self._create_sondes_df, 'obs_types': [120, 220], 'variable_ids': {'ps': 110, 'q': 29, 't': 2, 'u': 3, 'v': 4} }, 'MARINE': { 'df_type': self._create_conv_df, 'obs_types': [180, 183, 280, 282, 284], 'variable_ids': {'ps': 110, 'q': 58, 't': 39, 'u': 41, 'v': 42} } } df_list = [] for file in inputfiles: logging.info(f'Working on {file} ...') logging.info(f'Reading gsi diag ... ') df = self._read_gsi_diag(file) df_list.append(df) df_total = pd.concat(df_list) logging.info('Files successfully read into dataframe!') # Grab data specific to WDQMS type df_total = self._wdqms_type_requirements(df_total) # Grab actual datetimes from datetime + timedelta df_total = self._get_datetimes(df_total) # Drop duplicates df_total = df_total.drop_duplicates() # Create temporary dataframe of only temperature and q values tq_condition = [self.wdqms_type_dict[self.wdqms_type]['variable_ids']['t'], self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q']] tq_df = df_total.loc[df_total['var_id'].isin(tq_condition)] no_tq_df = df_total.loc[~df_total['var_id'].isin(tq_condition)] # Adjust relative humidity data tq_df = self._genqsat(tq_df) # Merge the non t and q values back into returned t and q dataframe df_total = pd.concat([tq_df, no_tq_df]) # Add Status Flag column df_total = self._create_status_flag(df_total) df_total['StatusFlag'] = df_total['StatusFlag'].astype(int) # Sort by Station ID df_total['Station_ID'] = df_total['Station_ID'].astype(str) df_total = df_total.sort_values('Station_ID') logging.info(f'Creating dataframe for {self.wdqms_type} type ...') # Create dataframe for appropriate WDQMS type output_df = self.wdqms_type_dict[self.wdqms_type]['df_type'](df_total) # Get str datetime self.datetime = inputfiles[0].split('/')[-1].split('.')[-2] out_filename = self._df_to_csv(output_df) logging.info(f"Success! Output file saved to: {out_filename}") logging.info("Exiting ...") sys.exit() return def _wdqms_type_requirements(self, df): """ Filter dataframe to only include specific observation types. """ logging.info("Working in wdqms_type_requirements()") logging.debug(f"WDQMS Type: {self.wdqms_type}") logging.debug(f"Total observations for {self.wdqms_type} before filter: {len(df)}") obs_types = self.wdqms_type_dict[self.wdqms_type]['obs_types'] df = df.loc[(df['Observation_Type'].isin(obs_types))] if self.wdqms_type in ['SYNOP', 'MARINE']: # Only include -3 > val >= 3 to avoid overlapping in cycles df = df.loc[df['Time'] != -3.] # Remove bad background departures for each variable df.loc[(df['var_id'] == 110) & (df['Obs_Minus_Forecast_adjusted'].abs() > 200), 'Obs_Minus_Forecast_adjusted'] = -999.9999 df.loc[(df['var_id'] != 110) & (df['Obs_Minus_Forecast_adjusted'].abs() > 500), 'Obs_Minus_Forecast_adjusted'] = -999.9999 if self.wdqms_type in ['TEMP']: # Only include assimilated data as per WDQMS requirement document df = df.loc[df['Analysis_Use_Flag'] == 1] logging.debug(f"Total observations for {self.wdqms_type} after filter: {len(df)}") logging.info("Exiting wdqms_type_requirements()") return df def _get_datetimes(self, df): """ Use 'Datetime' and 'Time' columns to create new datetime and separate into new columns: 'YYYYMMDD' and 'HHMMSS' Args: df : (df) pandas dataframe populated with data from GSI diagnostic files Returns: df: (df) the same dataframe read in with new columns: 'YYYYMMDD' and 'HHMMSS' """ logging.info("Working in get_datetimes()") # Convert 'Datetime' column from str to datetime dates = pd.to_datetime(df['Datetime'], format='%Y%m%d%H') # Converts 'Time' column to time delta in hours hrs = pd.to_timedelta(df['Time'], unit='hours') # Actual datetime of ob adding datetime and timedelta in hours new_dt = dates + hrs df['yyyymmdd'] = new_dt.dt.strftime('%Y%m%d') df['HHMMSS'] = new_dt.dt.strftime('%H%M%S') logging.info("Exiting get_datetimes()") return df def _create_status_flag(self, df): """ Create Status Flag based on the values from Prep_QC_Mark, Prep_Use_Flag, and Analysis_Use_Flag. Args: df: (df) pandas dataframe populated with data from GSI diagnostic files Returns: df: (df) the same dataframe read in with a new column: 'StatusFlag' """ logging.info("Working in create_status_flag()") # Create 'StatusFlag' column and fill with nans df['StatusFlag'] = np.nan # Obs used by GSI, Status_Flag=0 df.loc[(df['Prep_QC_Mark'] <= 8) & ( df['Analysis_Use_Flag'] == 1), 'StatusFlag'] = 0 # Obs rejected by GSI, Status_Flag=0 df.loc[(df['Prep_QC_Mark'] <= 8) & ( df['Analysis_Use_Flag'] == -1), 'StatusFlag'] = 2 # Obs never used by GSI, Status_Flag=3 df.loc[(df['Prep_QC_Mark'] > 8) & ( df['Prep_Use_Flag'] >= 100), 'StatusFlag'] = 3 # Obs is flagged for non-use by the analysis, Status_Flag=3 df.loc[df['Prep_QC_Mark'] >= 15, 'StatusFlag'] = 3 # Obs rejected by SDM or CQCPROF, Status_Flag=7 df.loc[(df['Prep_QC_Mark'] >= 12) & ( df['Prep_QC_Mark'] <= 14), 'StatusFlag'] = 7 # Fill those that do not fit a condition with -999 df.loc[df['StatusFlag'].isnull(), 'StatusFlag'] = -999 logging.debug("Status Flag Counts:") logging.debug(f"{df['StatusFlag'].value_counts()}") logging.info("Exiting create_status_flag()") return df def _round_column(self, df, col): """ Round column numbers to 4 decimal places. Input: df: dataframe with information col: column name to round data Output: df: dataframe with changed values in provided column """ logging.debug("Working in round_column()") df[col] = df[col].map('{:,.4f}'.format) logging.debug("Exiting round_column()") return df def _create_conv_df(self, df): """ Create dataframe for conventional data. """ logging.info("Working in create_conv_df") # Add center_id df['Centre_id'] = 'NCEP' df['CodeType'] = 999 # Remove unnecessary columns df.drop(['Observation_Type', 'Pressure', 'Time', 'Prep_QC_Mark', 'Prep_Use_Flag', 'Analysis_Use_Flag', 'Datetime'], axis=1, inplace=True) # Rename columns df = df.rename({'Obs_Minus_Forecast_adjusted': 'Bg_dep', 'Latitude': 'latitude', 'Station_ID': 'Station_id'}, axis=1) # ordered columns cols = ['Station_id', 'yyyymmdd', 'HHMMSS', 'latitude', 'Longitude', 'StatusFlag', 'Centre_id', 'var_id', 'Bg_dep', 'CodeType'] df = df[cols] df = df.reset_index(drop=True) # Round given columns to four decimal places for col in ['latitude', 'Longitude', 'Bg_dep']: df = self._round_column(df, col) logging.info("Exiting create_conv_df()") return df def _create_sondes_df(self, df): """ Create dataframe for sondes. """ logging.info("Working in create_sondes_df()") stn_ids = df['Station_ID'].unique() df_list = [] # Loop through stations and create individual dataframes # that grabs average stats from surface, troposphere, and # stratosphere for stn in stn_ids: logging.debug(f"Station ID: {stn}") d = { 'var_id': [], 'Mean_Bg_dep': [], 'Std_Bg_dep': [], 'Levels': [], 'LastRepLevel': [], 'StatusFlag': [] } surf_lat = None surf_lon = None # Temporary dataframe of specific station data tmp = df.loc[df['Station_ID'] == stn] # Add pressure info if available if 110 in tmp['var_id'].unique(): logging.debug(f"Variable: p") mean_bg_dep = tmp['Obs_Minus_Forecast_adjusted'].loc[tmp['var_id'] == 110].values[0] std_bg_dep = 0 level = 'Surf' last_rep_lvl = -999.99 status_flag = tmp['StatusFlag'].loc[tmp['var_id'] == 110].values[0] d['var_id'].append(110) d['Mean_Bg_dep'].append(mean_bg_dep) # cannot compute std w/ one value so set to 0 d['Std_Bg_dep'].append(std_bg_dep) d['Levels'].append(level) d['LastRepLevel'].append(last_rep_lvl) d['StatusFlag'].append(status_flag) # surface lat and lon if exists surf_lat = tmp['Latitude'].loc[tmp['var_id'] == 110].values[0] surf_lon = tmp['Longitude'].loc[tmp['var_id'] == 110].values[0] logging.debug("Mean_Bg_dep, Std_Bg_dep, Levels, LastRepLevel, StatusFlag") logging.debug(f"{mean_bg_dep}, {std_bg_dep}, {level}, {last_rep_lvl}, {status_flag}") # Get unique variable ID's and remove 110 (surface pressure) var_ids = sorted(tmp['var_id'].unique()) var_ids.remove(110) if 110 in var_ids else var_ids for var in var_ids: logging.debug(f"Variable: {var}") # Surface # Find max pressure of the surface 110 value surf_p_max = tmp['Pressure'].loc[tmp['var_id'] == 110].max() if (110 in tmp['var_id'].unique() and var in tmp['var_id'].loc[tmp['Pressure'] == surf_p_max].unique()): surf_tmp = tmp.loc[(tmp['Pressure'] == surf_p_max) & (tmp['var_id'] == var)] surf_omf = surf_tmp['Obs_Minus_Forecast_adjusted'].values.mean( ) surf_std = 0 # cannot compute std w/ one value so set to 0 level = 'Surf' last_rep_lvl = -999.99 # If at least one ob is used, we report the lowest Status Flag. # Although it does not represent the whole column, it is what is # required by the WDQMS team. status_flag = surf_tmp['StatusFlag'].min() d['var_id'].append(var) d['Mean_Bg_dep'].append(surf_omf) d['Std_Bg_dep'].append(surf_std) d['Levels'].append(level) d['LastRepLevel'].append(last_rep_lvl) d['StatusFlag'].append(status_flag) logging.debug("Mean_Bg_dep, Std_Bg_dep, Levels, LastRepLevel, StatusFlag") logging.debug(f"{surf_omf}, {surf_std}, {level}, {last_rep_lvl}, {status_flag}") # Troposphere trop_tmp = tmp.loc[(tmp['var_id'] == var) & (tmp['Pressure'] >= 100)] if len(trop_tmp) > 0: trop_avg_omf = trop_tmp['Obs_Minus_Forecast_adjusted'].mean( ) trop_std_omf = trop_tmp['Obs_Minus_Forecast_adjusted'].std( ) level = 'Trop' # Get lowest p for entire atmosphere last_rep_lvl = tmp['Pressure'].min() # If at least one ob is used, we report the lowest Status Flag. # Although it does not represent the whole column, it is what is # required by the WDQMS team. status_flag = trop_tmp['StatusFlag'].min() d['var_id'].append(var) d['Mean_Bg_dep'].append(trop_avg_omf) d['Std_Bg_dep'].append(trop_std_omf) d['Levels'].append(level) d['LastRepLevel'].append(last_rep_lvl) d['StatusFlag'].append(status_flag) logging.debug("Mean_Bg_dep, Std_Bg_dep, Levels, LastRepLevel, StatusFlag") logging.debug(f"{trop_avg_omf}, {trop_std_omf}, {level}, {last_rep_lvl}, {status_flag}") # Stratosphere stra_tmp = tmp.loc[(tmp['var_id'] == var) & (tmp['Pressure'] < 100)] if len(stra_tmp) > 0: stra_avg_omf = stra_tmp['Obs_Minus_Forecast_adjusted'].mean( ) stra_std_omf = 0 if len( stra_tmp == 1) else stra_tmp['Obs_Minus_Forecast_adjusted'].std() level = 'Stra' # Get lowest p for entire atmosphere last_rep_lvl = tmp['Pressure'].min() # If at least one ob is used, we report the lowest Status Flag. # Although it does not represent the whole column, it is what is # required by the WDQMS team. status_flag = stra_tmp['StatusFlag'].min() d['var_id'].append(var) d['Mean_Bg_dep'].append(stra_avg_omf) d['Std_Bg_dep'].append(stra_std_omf) d['Levels'].append(level) d['LastRepLevel'].append(last_rep_lvl) d['StatusFlag'].append(status_flag) logging.debug("Mean_Bg_dep, Std_Bg_dep, Levels, LastRepLevel, StatusFlag") logging.debug(f"{stra_avg_omf}, {stra_std_omf}, {level}, {last_rep_lvl}, {status_flag}") sub_df = pd.DataFrame.from_dict(d) sub_df['Station_id'] = stn # Add lats and lons lat = surf_lat if surf_lat else tmp['Latitude'].value_counts( ).index[0] lon = surf_lon if surf_lon else tmp['Longitude'].value_counts( ).index[0] sub_df['latitude'] = lat sub_df['Longitude'] = lon # add datetime str_datetime = str(tmp['Datetime'].values[0]) sub_df['yyyymmdd'] = str_datetime[:-2] sub_df['HHMMSS'] = str_datetime[-2:] + '0000' df_list.append(sub_df) df = pd.concat(df_list) df['Centre_id'] = 'NCEP' df['CodeType'] = 999 df = df.dropna() # Ordered columns cols = ['Station_id', 'yyyymmdd', 'HHMMSS', 'latitude', 'Longitude', 'StatusFlag', 'Centre_id', 'var_id', 'Mean_Bg_dep', 'Std_Bg_dep', 'Levels', 'LastRepLevel', 'CodeType'] df = df[cols] df = df.reset_index(drop=True) # Round given columns to four decimal places for col in ['latitude', 'Longitude', 'Mean_Bg_dep', 'Std_Bg_dep', 'LastRepLevel']: df = self._round_column(df, col) logging.info("Exiting create_sondes_df()") return df def _genqsat(self, df): """ Calculates new background departure values for specific humidity (q) by calculating saturation specific humidity from corresponding temperature and pressure values. bg_dep = (q_obs/qsat_obs)-(q_ges/qsat_ges) q_obs : measured q obs qsat_obs : calculated saturation q q_ges : q_obs minus q background error from GSI diagnostic file qsat_ges : calculated saturation q using temperature obs minus temperature background error from GSI diagnostic file Args: df : (df) pandas dataframe populated with t and q data from GSI diagnostic files wdqms_type : (str) wdqms type file being created Returns: df: (df) the same dataframe read in with new background departure values for humidity data """ logging.info("Working in genqstat()") # Get variable type specific to WDQMS type q_id = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['q'] t_id = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['t'] df_list = [] # Groupby Station_ID for stn, stn_df in df.groupby('Station_ID'): # Filter the dataframes q_df = stn_df[stn_df['var_id'] == q_id] t_df = stn_df[stn_df['var_id'] == t_id] # Make sure there are no duplicates in q_df and t_df before merging columns_to_compare = ['Station_ID', 'Latitude', 'Longitude', 'Pressure', 'Time'] q_df = q_df.drop_duplicates(subset=columns_to_compare) t_df = t_df.drop_duplicates(subset=columns_to_compare) # Merge dataframes on common keys using an inner join merged_df = pd.merge(q_df, t_df, on=['Station_ID', 'Latitude', 'Longitude', 'Pressure', 'Time'], suffixes=('_q', '_t'), how='inner') # Calculate needed values q_obs = merged_df['Observation_q'].to_numpy() * 1.0e6 q_ges = (merged_df['Observation_q'].to_numpy() - merged_df['Obs_Minus_Forecast_adjusted_q'].to_numpy()) * 1.0e6 t_obs = merged_df['Observation_t'].to_numpy() - 273.16 t_ges = (merged_df['Observation_t'].to_numpy() - merged_df['Obs_Minus_Forecast_adjusted_t'].to_numpy()) - 273.16 pressure = merged_df['Pressure'].to_numpy() qsat_obs = self._temp_2_saturation_specific_humidity(pressure, t_obs) qsat_ges = self._temp_2_saturation_specific_humidity(pressure, t_ges) # Calculate background departure bg_dep = (q_obs / qsat_obs) - (q_ges / qsat_ges) # Grab conditions from merged_df station_ids = merged_df['Station_ID'] pressure_vals = merged_df['Pressure'] time_vals = merged_df['Time'] latitude = merged_df['Latitude'] longitude = merged_df['Longitude'] conditions = (q_df['Station_ID'].isin(station_ids)) & \ (q_df['Pressure'].isin(pressure_vals)) & \ (q_df['Time'].isin(time_vals)) & \ (q_df['Latitude'].isin(latitude)) & \ (q_df['Longitude'].isin(longitude)) # Update the background departure values for q_df q_df = q_df.loc[conditions] q_df['Obs_Minus_Forecast_adjusted'] = bg_dep df_list.append(pd.concat([t_df, q_df])) df = pd.concat(df_list) logging.info("Exiting genqstat()") return df def _temp_2_saturation_specific_humidity(self, pres, tsen): """ Uses pressure and temperature arrays to calculate saturation specific humidity. Args: pres: (array) array of pressure obs tsen: (array) array of temperature obs in Celsius Returns: qsat_array: (array) corresponding calculated sat. spec. humidity """ logging.debug("Working in temp_2_saturation_specific_humidity()") ttp = 2.7316e2 # temperature at h2o triple point (K) psat = 6.1078e1 # pressure at h2o triple point (Pa) cvap = 1.8460e3 # specific heat of h2o vapor (J/kg/K) csol = 2.1060e3 # specific heat of solid h2o (ice)(J/kg/K) hvap = 2.5000e6 # latent heat of h2o condensation (J/kg) hfus = 3.3358e5 # latent heat of h2o fusion (J/kg) rd = 2.8705e2 rv = 4.6150e2 cv = 7.1760e2 cliq = 4.1855e3 dldt = cvap - cliq dldti = cvap - csol hsub = hvap + hfus tmix = ttp - 20. xa = -(dldt / rv) xai = -(dldti / rv) xb = xa + hvap / (rv * ttp) xbi = xai + hsub / (rv * ttp) eps = rd / rv omeps = 1.0 - eps tdry = tsen + ttp tdry = np.array([1.0e-8 if np.abs(t) < 1.0e-8 else t for t in tdry]) tr = ttp / tdry qsat_array = [] # Loop through temperatures and appropriate indexes to solve qsat for idx, t in enumerate(tdry): # Get correct estmax and es values based on conditions if t >= ttp: estmax = psat * (tr[idx]**xa) * np.exp(xb * (1.0 - tr[idx])) es = estmax elif t < tmix: estmax = psat * (tr[idx]**xa) * np.exp(xbi * (1.0 - tr[idx])) es = estmax else: w = (t - tmix) / (ttp - tmix) estmax = w * psat * (tr[idx]**xa) * np.exp(xb * (1.0 - tr[idx])) \ + (1.0 - w) * psat * (tr[idx]**xai) * \ np.exp(xbi * (1.0 - tr[idx])) es = w * psat * (tr[idx]**xa) * np.exp(xb * (1.0 - tr[idx])) \ + (1.0 - w) * psat * (tr[idx]**xai) * \ np.exp(xbi * (1.0 - tr[idx])) pw = pres[idx] esmax = pw esmax = np.min([esmax, estmax]) es2 = np.min([es, esmax]) qsat = eps * es2 / ((pw * 10.0) - (omeps * es2)) qsat2 = qsat * 1e6 qsat_array.append(qsat2) logging.debug("Exiting temp_2_saturation_specific_humidity()") return np.array(qsat_array) def _read_gsi_diag(self, file): """ Reads the data from the conventional diagnostic file during initialization into a pandas dataframe. Args: file : (str) netCDF GSI diagnostic file Returns: df : (dataframe) pandas dataframe populated with data from netCDF GSI diagnostic file """ logging.debug("Working in read_gsi_diag()") filename = os.path.splitext(Path(file).stem)[0] logging.debug(f'Filename: {filename}') variable = filename.split('_')[2] logging.debug(f'Variable: {variable}') df_dict = {} column_list = ['Station_ID', 'Observation_Class', 'Observation_Type', 'Latitude', 'Longitude', 'Pressure', 'Time', 'Prep_QC_Mark', 'Prep_Use_Flag', 'Analysis_Use_Flag', 'Observation', 'Obs_Minus_Forecast_adjusted'] # Grab datetime from file datetime = self._grab_netcdf_data(file, 'Datetime') if variable == 'uv': for wtype in ['u', 'v']: df_dict[wtype] = {} for col in column_list: col = f'{wtype}_' + col if col == 'Observation' else col col = f'{wtype}_' + \ col if col == 'Obs_Minus_Forecast_adjusted' else col data = self._grab_netcdf_data(file, col) df_dict[wtype][col] = data # Need to separate the u and v dataframes to concatenate them u_df = pd.DataFrame(df_dict['u']) u_df = u_df.rename({'Observation_Class': 'var_id', 'u_Observation': 'Observation', 'u_Obs_Minus_Forecast_adjusted': 'Obs_Minus_Forecast_adjusted'}, axis=1) u_df['var_id'] = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['u'] v_df = pd.DataFrame(df_dict['v']) v_df = v_df.rename({'Observation_Class': 'var_id', 'v_Observation': 'Observation', 'v_Obs_Minus_Forecast_adjusted': 'Obs_Minus_Forecast_adjusted'}, axis=1) v_df['var_id'] = self.wdqms_type_dict[self.wdqms_type]['variable_ids']['v'] df = pd.concat([u_df, v_df]) else: for col in column_list: data = self._grab_netcdf_data(file, col) df_dict[col] = data df = pd.DataFrame(df_dict) df = df.rename({'Observation_Class': 'var_id'}, axis=1) df['var_id'] = self.wdqms_type_dict[self.wdqms_type]['variable_ids'][variable] # Add datetime column to dataframe df['Datetime'] = datetime # Subtract longitudes > 180 by 360 to be negative df.loc[df['Longitude'] > 180, 'Longitude'] -= 360 logging.debug("Exiting read_gsi_diag()") return df def _grab_netcdf_data(self, file, var): """ Opens and grabs data based on column name. Args: file : (str) netCDF GSI file var : (str) the variable to be extracted Returns: data : (array) values from the specified variable """ logging.debug('Working in grab_netcdf_data()') with Dataset(file, mode='r') as f: # Station_ID and Observation_Class variables need # to be converted from byte string to string if var == 'Datetime': data = f.date_time elif var in ['Station_ID', 'Observation_Class']: data = f.variables[var][:] data = [i.tobytes(fill_value=' ', order='C') for i in data] data = np.array( [''.join(i.decode('UTF-8', 'ignore').split()) for i in data]) # Grab variables with only 'nobs' dimension elif len(f.variables[var].shape) == 1: data = f.variables[var][:] logging.debug("Exiting grab_netcdf_data()") return data def _df_to_csv(self, df): """ Produce output .csv file from dataframe. """ logging.info("Working in df_to_csv()") logging.info(f'Coverting dataframe to .csv file for {self.wdqms_type} data ...') # Write dataframe to .csv date = self.datetime[:-2] cycle = self.datetime[-2:] hr_range = { '00': ['21', '03'], '06': ['03', '09'], '12': ['09', '15'], '18': ['15', '21'] } filename = f'{self.outdir}/NCEP_{self.wdqms_type}_{date}_{cycle}.csv' f = open(filename, 'a') f.write(f"# TYPE={self.wdqms_type}\n") f.write(f"#An_Date= {date}\n") f.write(f"#An_time= {cycle}\n") f.write(f"#An_range=[ {hr_range[cycle][0]} to {hr_range[cycle][-1]} )\n") f.write("#StatusFlag: 0(Used);1(Not Used);2(Rejected by DA);" "3(Never Used by DA);4(Data Thinned);5(Rejected before DA);" "6(Alternative Used);7(Quality Issue);8(Other Reason);9(No content)\n") df.to_csv(f, index=False) f.close() logging.info(f'{filename} file created.') logging.info('Exiting df_to_csv()') return filename if __name__ == "__main__": # Parse command line ap = argparse.ArgumentParser() ap.add_argument("-i", "--input_list", nargs='+', default=[], help="List of input GSI diagnostic files") ap.add_argument("-t", "--type", help="WDQMS file type (SYNOP, TEMP, MARINE)") ap.add_argument("-o", "--outdir", help="Out directory where files will be saved") ap.add_argument('-d', '--debug', help="Print debugging statements to log file", action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.WARNING) ap.add_argument('-v', '--verbose', help="Print information statements about code", action="store_const", dest="loglevel", const=logging.INFO) args = ap.parse_args() if args.type not in ['SYNOP', 'TEMP', 'MARINE']: raise ValueError(f"FATAL ERROR: {args.type} not a correct input. Inputs include: 'SYNOP, TEMP, MARINE'") WDQMS(args.input_list, args.type, args.outdir, args.loglevel)