''' Program Name: get_data_file.py Contact(s): Mallory Row Abstract: This script is run by all scripts in scripts/. This gets the necessary data files to run the METplus use case. ''' import os import subprocess import datetime from time import sleep import pandas as pd import glob import numpy as np print("BEGIN: "+os.path.basename(__file__)) def get_time_info(start_date_str, end_date_str, start_hr_str, end_hr_str, hr_inc_str, fhr_list, date_type): """! This creates a list of dictionaries containing information on the valid dates and times, the initialization dates and times, and forecast hour pairings Args: start_date_str - string of the verification start date end_date_str - string of the verification end date start_hr_str - string of the verification start hour end_hr_str - string of the verification end hour hr_inc_str - string of the increment between start_hr and end_hr fhr_list - list of strings of the forecast hours to verify date_type - string defining by what type date and times to create METplus data Returns: time_info - list of dictionaries with the valid, initalization, and forecast hour pairings """ sdate = datetime.datetime(int(start_date_str[0:4]), int(start_date_str[4:6]), int(start_date_str[6:]), int(start_hr_str)) edate = datetime.datetime(int(end_date_str[0:4]), int(end_date_str[4:6]), int(end_date_str[6:]), int(end_hr_str)) date_inc = datetime.timedelta(seconds=int(hr_inc_str)) time_info = [] date = sdate while date <= edate: if date_type == 'VALID': valid_time = date elif date_type == 'INIT': init_time = date for fhr in fhr_list: if fhr == 'anl': lead = '0' else: lead = fhr if date_type == 'VALID': init_time = valid_time - datetime.timedelta(hours=int(lead)) elif date_type == 'INIT': valid_time = init_time + datetime.timedelta(hours=int(lead)) t = {} t['valid_time'] = valid_time t['init_time'] = init_time t['lead'] = lead time_info.append(t) date = date + date_inc return time_info def format_filler(unfilled_file_format, dt_valid_time, dt_init_time, str_lead): """! This creates a list of objects containing information on the valid dates and times, the initialization dates and times, and forecast hour pairings Args: unfilled_file_format - string of file naming convention dt_valid_time - datetime object of the valid time dt_init_time - datetime object of the initialization time str_lead - string of the forecast lead Returns: filled_file_format - string of file_format filled in with verifying time information """ filled_file_format = '' format_opt_list = ['lead', 'valid', 'init', 'cycle'] for filled_file_format_chunk in unfilled_file_format.split('/'): for format_opt in format_opt_list: nformat_opt = ( filled_file_format_chunk.count('{'+format_opt+'?fmt=') ) if nformat_opt > 0: format_opt_count = 1 while format_opt_count <= nformat_opt: format_opt_count_fmt = ( filled_file_format_chunk \ .partition('{'+format_opt+'?fmt=')[2] \ .partition('}')[0] ) if format_opt == 'valid': replace_format_opt_count = dt_valid_time.strftime( format_opt_count_fmt ) elif format_opt == 'lead': if format_opt_count_fmt == '%1H': if int(str_lead) < 10: replace_format_opt_count = str_lead[1] else: replace_format_opt_count = str_lead elif format_opt_count_fmt == '%2H': replace_format_opt_count = str_lead.zfill(2) elif format_opt_count_fmt == '%3H': replace_format_opt_count = str_lead.zfill(3) else: replace_format_opt_count = str_lead elif format_opt in ['init', 'cycle']: replace_format_opt_count = dt_init_time.strftime( format_opt_count_fmt ) filled_file_format_chunk = ( filled_file_format_chunk.replace( '{'+format_opt+'?fmt=' +format_opt_count_fmt+'}', replace_format_opt_count ) ) format_opt_count+=1 filled_file_format = os.path.join(filled_file_format, filled_file_format_chunk) return filled_file_format def wget_data(wget_job_filename, wget_job_name, wget_job_output): """! This submits a job to the transfer queue to use wget to retrieve data Args: wget_job_filename - string of the path to the wget job file wget_job_name - string of job submission name wget_job_output - string of the path to write job submission output Returns: """ wget_walltime = os.environ['hpss_walltime'] machine = os.environ['machine'] QUEUESERV = os.environ['QUEUESERV'] ACCOUNT = os.environ['ACCOUNT'] # Set up job wall time information walltime_seconds = ( datetime.timedelta(minutes=int(wget_walltime)).total_seconds() ) walltime = (datetime.datetime.min + datetime.timedelta(minutes=int(wget_walltime))).time() # Submit job os.chmod(wget_job_filename, 0o755) print("Submitting "+wget_job_filename+" to "+QUEUESERV) print("Output sent to "+wget_job_output) if machine == 'WCOSS2': os.system('qsub -V -l walltime='+walltime.strftime('%H:%M:%S')+' ' +'-q '+QUEUESERV+' -A '+ACCOUNT+' -o '+wget_job_output+' ' +'-e '+wget_job_output+' -N '+wget_job_name+' ' +'-l select=1:ncpus=1 '+wget_job_filename) job_check_cmd = ('qselect -s QR -u '+os.environ['USER']+' ' +'-N '+wget_job_name+' | wc -l') elif machine in ['HERA', 'ORION', 'S4', 'JET', 'HERCULES']: os.system('sbatch --ntasks=1 --time=' +walltime.strftime('%H:%M:%S')+' --partition='+QUEUESERV+' ' +'--account='+ACCOUNT+' --output='+wget_job_output+' ' +'--job-name='+wget_job_name+' '+wget_job_filename) job_check_cmd = ('squeue -u '+os.environ['USER']+' -n ' +wget_job_name+' -t R,PD -h | wc -l') sleep_counter, sleep_checker = 1, 10 while (sleep_counter*sleep_checker) <= walltime_seconds: sleep(sleep_checker) print("Walltime checker: "+str(sleep_counter*sleep_checker)+" " +"out of "+str(int(walltime_seconds))+" seconds") check_job = subprocess.check_output(job_check_cmd, shell=True, encoding='UTF-8') if check_job[0] == '0': break sleep_counter+=1 def set_up_gfs_hpss_info(dt_init_time, hpss_dir, model_dump, hpss_file_suffix, save_data_dir): """! This sets up HPSS and job information specifically for getting GFS data from HPSS. Args: dt_init_time - datetime object of the initialization time hpss_dir - string of the base HPSS directory path model_dump - string of model dump the beinginng of the HPSS file (gfs, gdas, enkfgfs) hpss_file_suffix - string of information on the end of the HPSS file save_data_dir - string of the path to the directory where the HPSS retrieved file will be saved Returns: hpss_tar - string of the tar file path where hpss_file is located hpss_file - string of the file name to be retrieved from HPSS hpss_job_filename - string of the path of the HPSS job card name """ # Read in environment variables HTAR = os.environ['HTAR'] # Set date variables YYYYmmddHH = dt_init_time.strftime('%Y%m%d%H') YYYYmmdd = dt_init_time.strftime('%Y%m%d') YYYYmm = dt_init_time.strftime('%Y%m') YYYY = dt_init_time.strftime('%Y') mm = dt_init_time.strftime('%m') dd = dt_init_time.strftime('%d') HH = dt_init_time.strftime('%H') if 'NCEPPROD' in hpss_dir: # Operational GFS HPSS archive set up if dt_init_time \ >= datetime.datetime.strptime('20221129', '%Y%m%d'): if hpss_file_suffix == 'prepbufr': hpss_tar_filename_prefix = ('com_obsproc_v1.1_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.obsproc_' +model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t' +HH+'z.') else: hpss_tar_filename_prefix = ('com_gfs_v16.3_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.'+model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t' +HH+'z.') elif dt_init_time \ >= datetime.datetime.strptime('20220628', '%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20221129', '%Y%m%d'): if hpss_file_suffix == 'prepbufr': hpss_tar_filename_prefix = ('com_obsproc_v1.0_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.obsproc_' +model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t' +HH+'z.') else: hpss_tar_filename_prefix = ('com_gfs_v16.2_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.'+model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t' +HH+'z.') elif dt_init_time \ >= datetime.datetime.strptime('20210321', '%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20220628', '%Y%m%d'): hpss_tar_filename_prefix = ('com_gfs_prod_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.'+model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t'+HH+'z.') elif dt_init_time \ >= datetime.datetime.strptime('20200226', '%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20210321', '%Y%m%d'): hpss_tar_filename_prefix = ('com_gfs_prod_'+model_dump+'.' +YYYYmmdd+'_'+HH+'.'+model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, model_dump+'.t'+HH+'z.') elif dt_init_time \ >= datetime.datetime.strptime('20190612', '%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20200226', '%Y%m%d'): hpss_tar_filename_prefix = ('gpfs_dell1_nco_ops_com_gfs_prod_' +model_dump+'.'+YYYYmmdd+'_'+HH +'.'+model_dump) hpss_file_prefix = os.path.join(model_dump+'.'+YYYYmmdd, HH, model_dump+'.t'+HH+'z.') elif dt_init_time \ >= datetime.datetime.strptime('20170720','%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20190612','%Y%m%d'): hpss_tar_filename_prefix = ('gpfs_hps_nco_ops_com_gfs_prod_' +model_dump+'.'+YYYYmmddHH) hpss_file_prefix = model_dump+'.t'+HH+'z.' elif dt_init_time \ >= datetime.datetime.strptime('20160510', '%Y%m%d') \ and dt_init_time \ < datetime.datetime.strptime('20170720', '%Y%m%d'): hpss_tar_filename_prefix = ('com2_gfs_prod_'+model_dump +'.'+YYYYmmddHH) hpss_file_prefix = model_dump+'.t'+HH+'z.' elif dt_init_time \ < datetime.datetime.strptime('20160510', '%Y%m%d'): hpss_tar_filename_prefix = ('com_gfs_prod_'+model_dump +'.'+YYYYmmddHH) hpss_file_prefix = model_dump+'.t'+HH+'z.' # gfs and gdas grib2 files if model_dump in ['gfs', 'gdas'] and (hpss_file_suffix == 'anl' \ or hpss_file_suffix[0] == 'f'): if dt_init_time \ >= datetime.datetime.strptime('20190612', '%Y%m%d'): hpss_tar_filename = hpss_tar_filename_prefix+'_pgrb2.tar' else: hpss_tar_filename = hpss_tar_filename_prefix+'.pgrb2_0p25.tar' hpss_file = hpss_file_prefix+'pgrb2.0p25.'+hpss_file_suffix # gdas prepbufr file if model_dump == 'gdas' and hpss_file_suffix == 'prepbufr': hpss_tar_filename = hpss_tar_filename_prefix+'.tar' hpss_file = hpss_file_prefix+hpss_file_suffix if dt_init_time \ < datetime.datetime.strptime('20170720', '%Y%m%d'): hpss_file = hpss_file.replace(model_dump, model_dump+'1') # enkfgdas files if model_dump == 'enkfgdas': hpss_tar_filename = hpss_tar_filename_prefix+'.tar' hpss_file = (hpss_file_prefix.replace(model_dump+'.t', 'gdas.t') +hpss_file_suffix) hpss_tar = os.path.join(hpss_dir, 'rh'+YYYY, YYYYmm, YYYYmmdd, hpss_tar_filename) else: # Set up tar file if model_dump == 'gfs': hpss_tar_filename = model_dump+'a.tar' else: hpss_tar_filename = model_dump+'.tar' hpss_tar = os.path.join(hpss_dir, YYYYmmddHH, hpss_tar_filename) # Set up file if hpss_file_suffix == 'cyclone.trackatcfunix': hpss_file = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', 'avno.t'+HH+'z.' +hpss_file_suffix) elif model_dump == 'enkfgdas': hpss_file = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', 'gdas.t'+HH+'z.' +hpss_file_suffix) else: hpss_file = os.path.join(model_dump+'.'+YYYYmmdd, HH, 'atmos', model_dump+'.t'+HH +'z.pgrb2.0p25.'+hpss_file_suffix) # Set up job file name hpss_job_filename = os.path.join(save_data_dir, 'HPSS_jobs', 'HPSS_'+hpss_tar.rpartition('/')[2] +'_'+hpss_file.replace('/', '_')+'.sh') return hpss_tar, hpss_file, hpss_job_filename def get_hpss_data(hpss_job_filename, save_data_dir, save_data_file, hpss_tar, hpss_file): """! This creates a job card with the necessary information to retrieve a file from HPSS. It then submits this job card to the transfer queue and the designating wall time. Args: hpss_job_filename - string of the path of the HPSS job card name save_data_dir - string of the path to the directory where the HPSS retrieved file will be saved save_data_file - string of the file name the HPSS retrieved file will be saved as hpss_tar - string of the tar file path where hpss_file is located hpss_file - string of the file name to be retrieved from HPSS Returns: """ # Read in environment variables HTAR = os.environ['HTAR'] hpss_walltime = os.environ['hpss_walltime'] machine = os.environ['machine'] QUEUESERV = os.environ['QUEUESERV'] ACCOUNT = os.environ['ACCOUNT'] # Set up job wall time information walltime_seconds = ( datetime.timedelta(minutes=int(hpss_walltime)).total_seconds() ) walltime = (datetime.datetime.min + datetime.timedelta(minutes=int(hpss_walltime))).time() if os.path.exists(hpss_job_filename): os.remove(hpss_job_filename) # Create job card with open(hpss_job_filename, 'a') as hpss_job_file: hpss_job_file.write('#!/bin/sh'+'\n') hpss_job_file.write('cd '+save_data_dir+'\n') hpss_job_file.write(HTAR+' -xf '+hpss_tar+' ./'+hpss_file+'\n') if '/NCEPPROD' not in hpss_tar: hpss_job_file.write(HTAR+' -xf '+hpss_tar+' ./' +hpss_file.replace('atmos/','')+'\n') if 'pgrb2' in hpss_file: cnvgrib = os.environ['CNVGRIB'] hpss_job_file.write(cnvgrib+' -g21 '+hpss_file+' ' +save_data_file+' > /dev/null 2>&1\n') if '/NCEPPROD' not in hpss_tar: hpss_job_file.write(cnvgrib+' -g21 ' +hpss_file.replace('atmos/','')+' ' +save_data_file+' > /dev/null 2>&1\n') hpss_job_file.write('rm -r '+hpss_file.split('/')[0]) elif 'trackatcfunix' in hpss_file: hpss_job_file.write('cp '+hpss_file.split('avno')[0]+'avno* ' +save_data_file+'\n') if '/NCEPPROD' not in hpss_tar: hpss_job_file.write('cp '+hpss_file.replace('atmos/','') \ .split('avno')[0]+'avno* '+save_data_file +'\n') hpss_job_file.write('rm -r '+hpss_file.split('/')[0]+'\n') model_atcf_abbrv = (save_data_file.split('/')[-2])[0:4].upper() hpss_job_file.write('sed -i s/AVNO/'+model_atcf_abbrv+'/g ' +save_data_file) else: if hpss_file[0:5] != 'ccpa.': hpss_job_file.write('cp '+hpss_file+' '+save_data_file+'\n') if '/NCEPPROD' not in hpss_tar: hpss_job_file.write('cp ' +hpss_file.replace('atmos/','')+' ' +save_data_file+'\n') hpss_job_file.write('rm -r '+hpss_file.split('/')[0]) # Submit job card os.chmod(hpss_job_filename, 0o755) hpss_job_output = hpss_job_filename.replace('.sh', '.out') if os.path.exists(hpss_job_output): os.remove(hpss_job_output) hpss_job_name = hpss_job_filename.rpartition('/')[2].replace('.sh', '') print("Submitting "+hpss_job_filename+" to "+QUEUESERV) print("Output sent to "+hpss_job_output) if machine == 'WCOSS2': os.system('qsub -V -l walltime='+walltime.strftime('%H:%M:%S')+' ' +'-q '+QUEUESERV+' -A '+ACCOUNT+' -o '+hpss_job_output+' ' +'-e '+hpss_job_output+' -N '+hpss_job_name+' ' +'-l select=1:ncpus=1 '+hpss_job_filename) job_check_cmd = ('qselect -s QR -u '+os.environ['USER']+' ' +'-N '+hpss_job_name+' | wc -l') elif machine in ['HERA', 'JET']: os.system('sbatch --ntasks=1 --time=' +walltime.strftime('%H:%M:%S')+' --partition='+QUEUESERV+' ' +'--account='+ACCOUNT+' --output='+hpss_job_output+' ' +'--job-name='+hpss_job_name+' '+hpss_job_filename) job_check_cmd = ('squeue -u '+os.environ['USER']+' -n ' +hpss_job_name+' -t R,PD -h | wc -l') elif machine in ['ORION', 'S4', 'HERCULES']: print("ERROR: No HPSS access from "+machine) if machine not in ['ORION', 'S4', 'HERCULES']: sleep_counter, sleep_checker = 1, 10 while (sleep_counter*sleep_checker) <= walltime_seconds: sleep(sleep_checker) print("Walltime checker: "+str(sleep_counter*sleep_checker)+" " +"out of "+str(int(walltime_seconds))+" seconds") check_job = subprocess.check_output(job_check_cmd, shell=True, encoding='UTF-8') if check_job[0] == '0': break sleep_counter+=1 def convert_grib2_grib1(grib2_file, grib1_file): """! This converts GRIB2 data to GRIB1 Args: grib2_file - string of the path to the GRIB2 file to convert grib1_file - string of the path to save the converted GRIB1 file Returns: """ print("Converting GRIB2 file "+grib2_file+" " +"to GRIB1 file "+grib1_file) cnvgrib = os.environ['CNVGRIB'] os.system(cnvgrib+' -g21 '+grib2_file+' ' +grib1_file+' > /dev/null 2>&1') def convert_grib1_grib2(grib1_file, grib2_file): """! This converts GRIB2 data to GRIB1 Args: grib1_file - string of the path to the GRIB1 file to convert grib2_file - string of the path to save the converted GRIB2 file Returns: """ print("Converting GRIB1 file "+grib1_file+" " +"to GRIB2 file "+grib2_file) cnvgrib = os.environ['CNVGRIB'] os.system(cnvgrib+' -g12 '+grib1_file+' ' +grib2_file+' > /dev/null 2>&1') def get_model_file(valid_time_dt, init_time_dt, lead_str, name, data_dir, file_format, run_hpss, hpss_data_dir, link_data_dir, link_file_format): """! This links a model file from its archive. If the file does not exist locally, then retrieve from HPSS if requested. Args: valid_time_dt - datetime object of the valid time init_time_dt - datetime object of the initialization time lead_str - string of the forecast lead name - string of the model name data_dir - string of the online archive for model file_format - string of the file format the files are saved as in the data_dir run_hpss - string of whether to get missing online model data (YES) or not (NO) hpss_data_dir - string of the path to model data on HPSS link_data_dir - string of the directory to link model data to link_file_format - string of the linked file name Returns: """ grib2_file_names = ['grib2', 'grb2'] link_filename = format_filler(link_file_format, valid_time_dt, init_time_dt, lead_str) link_model_file = os.path.join(link_data_dir, link_filename) if not os.path.exists(link_model_file): model_filename = format_filler(file_format, valid_time_dt, init_time_dt, lead_str) model_file = os.path.join(data_dir, name, model_filename) if os.path.exists(model_file): if any(g in model_file for g in grib2_file_names): convert_grib2_grib1(model_file, link_model_file) else: #if 'track' in link_filename: # os.system('cp '+model_file+' '+link_model_file) #else: os.system('ln -sf '+model_file+' '+link_model_file) else: if run_hpss == 'YES': print("Did not find "+model_file+" online..." +"going to try to get file from HPSS") if 'enkfgdas' in file_format: model_dump = 'enkfgdas' elif 'gdas' in file_format: model_dump = 'gdas' elif 'gfs' in file_format: model_dump = 'gfs' else: model_dump = name if lead_str != 'anl': file_lead = 'f'+lead_str.zfill(3) else: file_lead = lead_str if 'track' in link_file_format: (model_hpss_tar, model_hpss_file, model_hpss_job_filename) = set_up_gfs_hpss_info( init_time_dt, hpss_data_dir, model_dump, 'cyclone.trackatcfunix', link_data_dir ) elif 'ensspread' in link_file_format \ or 'ensmean' in link_file_format: if 'spread' in file_format: file_type = 'spread' elif 'mean' in file_format: file_type = 'mean' if '.nc4' in file_format: nc_type = 'nc4' else: nc_type = 'nc' (model_hpss_tar, model_hpss_file, model_hpss_job_filename) = set_up_gfs_hpss_info( init_time_dt, hpss_data_dir, model_dump, 'atm'+file_lead+'.ens'+file_type+'.'+nc_type, link_data_dir ) else: (model_hpss_tar, model_hpss_file, model_hpss_job_filename) = set_up_gfs_hpss_info( init_time_dt, hpss_data_dir, model_dump, file_lead, link_data_dir ) get_hpss_data(model_hpss_job_filename, link_data_dir, link_model_file, model_hpss_tar, model_hpss_file) else: print("Already got "+link_model_file) if not os.path.exists(link_model_file): if run_hpss == 'YES': print("WARNING: "+model_file+" does not exist and did not find " +"HPSS file "+model_hpss_file+" from "+model_hpss_tar+" or " +"walltime exceeded") else: print("WARNING: "+model_file+" does not exist") def create_mean_truth(mean_model_list, mean_model_dir_list, mean_model_file_format_list, valid_time_dt, grid, output_dir): """! This creates a mean from a list of model files. Args: mean_model_list - list of strings of model names mean_model_dir_list - list of string of the model directories mean_model_file_format_list - list of strings of the file formats valid_time_dt - datetime object of the valid time grid - the grid verification is done on, used for regridding output_dir - string of path to the base output directory Returns: """ # Models mean_file = os.path.join( output_dir, output_dir.rpartition('/')[2] +'.'+valid_time_dt.strftime('%Y%m%d%H') ) mean_grib2_file = (mean_file+'.grib2') mean_model_file_list = [] nmean_models = len(mean_model_list) # Variables variable_name_level_dict = { 'HGT': ['P1000', 'P850', 'P700', 'P500', 'P250', 'P200', 'P100', 'P50', 'P20', 'P10', 'P5', 'P1', 'L0_7'], 'TMP': ['P1000','P850', 'P700', 'P500', 'P250', 'P200', 'P100', 'P50', 'P20', 'P10', 'P5', 'P1', 'Z2', 'Z0', 'L0_7'], 'UGRD': ['P1000','P850', 'P700', 'P500', 'P250', 'P200', 'P100', 'P50', 'P20', 'P10', 'P5', 'P1', 'Z10'], 'VGRD': ['P1000','P850', 'P700', 'P500', 'P250', 'P200', 'P100', 'P50', 'P20', 'P10', 'P5', 'P1', 'Z10'], 'O3MR': ['P100', 'P70', 'P50', 'P30', 'P20', 'P10', 'P5', 'P1'], 'PRMSL': ['Z0'], 'RH': ['Z2'], 'SPFH': ['Z2'], 'HPBL': ['Z0'], 'PRES': ['Z0', 'L0_7'], 'TSOIL': ['Z0-10'], 'SOILW': ['Z0-10'], 'WEASD': ['Z0'], 'CAPE': ['Z0'], 'CWAT': ['L0_200'], 'PWAT': ['L0_200'], 'TOZNE': ['L0_200'] } # Executables regrid_data_plane = os.path.join( os.environ['HOMEMET'], os.environ['HOMEMET_bin_exec'], 'regrid_data_plane' ) wgrib = os.environ['WGRIB'] wgrib2 = os.environ['WGRIB2'] copygb = os.environ['COPYGB'] ncea = os.environ['NCEA'] ncdump = os.environ['NCDUMP'] # Get model files for mean_model in mean_model_list: mean_model_dir = os.path.join(output_dir, mean_model) mean_model_idx = mean_model_list.index(mean_model) mean_model_dir = mean_model_dir_list[mean_model_idx] mean_model_file_format = mean_model_file_format_list[mean_model_idx] save_mean_model_file_format = ( mean_model_file_format_list[mean_model_idx]\ .replace('.grib2', '').replace('.grb2', '') ) output_mean_model_dir = os.path.join(output_dir, mean_model) if not os.path.exists(output_mean_model_dir): os.makedirs(output_mean_model_dir) get_model_file(valid_time_dt, valid_time_dt, '00', mean_model, mean_model_dir, mean_model_file_format, 'NO', '/null', output_mean_model_dir, save_mean_model_file_format) mean_model_file = os.path.join(output_mean_model_dir, format_filler( save_mean_model_file_format, valid_time_dt, valid_time_dt, '00' )) if os.path.exists(mean_model_file): mean_model_file_list.append(mean_model_file) # Regrid files indivdually for variables for each model, and # take mean if available for all models # Create invdivdual grib2 template file for variable # NCEP only use GFS files for this (center = 7) if len(mean_model_file_list) == nmean_models: print("Creating mean truth file "+mean_file+" using " +', '.join(mean_model_file_list)) for var_name in variable_name_level_dict: for var_level in variable_name_level_dict[var_name]: all_models_have_var = True create_var_template = True mean_model_var_nc_file_list = [] for mean_model in mean_model_list: mean_model_idx = mean_model_list.index(mean_model) save_mean_model_file_format = mean_model_file_format_list[ mean_model_idx ].replace('.grib2', '').replace('.grb2', '') output_mean_model_dir = os.path.join(output_dir, mean_model) save_mean_model_file = os.path.join( output_mean_model_dir, format_filler(save_mean_model_file_format, valid_time_dt, valid_time_dt, '00') ) # Create template if create_var_template: output_template_dir = os.path.join(output_dir, 'template') if not os.path.exists(output_template_dir): os.makedirs(output_template_dir) template_regrid_file = os.path.join( output_template_dir, 'template.' +valid_time_dt.strftime('%Y%m%d%H')+'_regrid' ) template_grib2_file = os.path.join( output_template_dir, 'template.' +valid_time_dt.strftime('%Y%m%d%H')+'.grib2' ) template_var_grib2_file = os.path.join( output_template_dir, 'template.' +valid_time_dt.strftime('%Y%m%d%H')+'_' +var_name+'_'+var_level+'.grib2' ) if not os.path.exists(template_regrid_file): check_center = subprocess.check_output( wgrib+' -V '+save_mean_model_file, shell=True, encoding='UTF-8' ) if 'center 7 ' in check_center: os.system(copygb+' -'+grid.lower()+' -x ' +save_mean_model_file+' ' +template_regrid_file+' ' +'> /dev/null 2>&1') if os.path.exists(template_regrid_file): convert_grib1_grib2(template_regrid_file, template_grib2_file) if 'P' in var_level: var_level_grib2 = var_level[1:]+' mb' elif 'Z' in var_level: if var_level == 'Z0': if var_name == 'PRMSL': var_level_grib2 = 'mean sea level' else: var_level_grib2 = 'surface' elif var_level == 'Z0-10': var_level_grib2 = '0-0.1 m below ground' else: var_level_grib2 = (var_level[1:]+' m above ' +'ground') elif var_level == 'L0_7': var_level_grib2 = 'tropopause' elif var_level == 'L0_200': var_level_grib2 = ('entire atmosphere \(' +'considered as a single ' +'layer)') if os.path.exists(template_grib2_file): os.system(wgrib2+' '+template_grib2_file+' ' +'-match ":'+var_name+':" ' +'-match ":'+var_level_grib2+':" ' +'-grib_out ' +template_var_grib2_file+' ' +'> /dev/null 2>&1') if os.path.exists(template_var_grib2_file): create_var_template = False # Regrid mean_model_var_nc_file = os.path.join( output_mean_model_dir, format_filler(save_mean_model_file_format, valid_time_dt, valid_time_dt, '00')+'_' +var_name+'_'+var_level+'.nc' ) mean_model_var_nc_file_list.append( mean_model_var_nc_file ) nc_var = var_name+'_'+var_level.replace(' ', '') if '_' in var_level: field_info = ("'"+'name="'+var_name+'"; ' +'level="'+var_level.split('_')[0] +'"; GRIB_lvl_typ=' +var_level.split('_')[1]+";'") else: field_info = ("'"+'name="'+var_name+'"; ' +'level="'+var_level+'";'+"'") run_rdp = subprocess.run( regrid_data_plane+' '+save_mean_model_file+' ' +grid+' '+mean_model_var_nc_file+' ' +'-field '+field_info+' -method BILIN ' +'-width 2 -name '+nc_var+' -v 1 ' +'> /dev/null 2>&1', shell=True ) if run_rdp.returncode != 0: all_models_have_var = False # Take mean if all_models_have_var: mean_var_nc_file = (mean_file+'_'+var_name+'_' +var_level+'.nc') os.system(ncea+' ' +' '.join(mean_model_var_nc_file_list)+' ' +'-O -o '+mean_var_nc_file) lat_dim_output = subprocess.check_output( ncdump+' -h '+mean_var_nc_file+' ' +'| grep ":Nlat = "', shell=True, encoding='UTF-8' ) lat_dim = '' for string in lat_dim_output: if string.isdigit(): lat_dim = lat_dim + string lon_dim_output = subprocess.check_output( ncdump+' -h '+mean_var_nc_file+' ' +'| grep ":Nlon = "', shell=True, encoding='UTF-8' ) lon_dim = '' for string in lon_dim_output: if string.isdigit(): lon_dim = lon_dim + string # Fill using template os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1' mean_var_grib2_file = (mean_file+'_'+var_name+'_' +var_level+'.grib2') os.system(wgrib2+' ' +template_var_grib2_file+' ' +'-import_netcdf ' +mean_var_nc_file+' ' +'"'+nc_var+'" "0:'+lat_dim+':0:' +lon_dim+'" -grib_out ' +mean_var_grib2_file+' ' +'> /dev/null 2>&1') os.system('cat '+mean_var_grib2_file+' >> ' +mean_grib2_file) # Convert mean analysis to grib1 if os.path.exists(mean_grib2_file): convert_grib2_grib1(mean_grib2_file, mean_file) if os.path.exists(mean_file): print("Created "+mean_file) else: print("WARNING: Do not have all files to create "+mean_file+" from " +"models "+', '.join(mean_model_list)+", only have files " +', '.join(mean_model_file_list)) def get_model_stat_file(valid_time_dt, init_time_dt, lead_str, name, stat_data_dir, gather_by, RUN_dir_name, RUN_sub_dir_name, link_data_dir): """! This links a model .stat file from its archive. Args: valid_time_dt - datetime object of the valid time init_time_dt - datetime object of the initialization time lead_str - string of the forecast lead name - string of the model name stat_data_dir - string of the online archive for model MET .stat files gather_by - string of the file format the files are saved as in the data_dir RUN_dir_name - string of RUN directory name in 'metplus_data' archive RUN_sub_dir_name - string of RUN sub-directory name (under RUN_dir_name) in 'metplus_data' archive link_data_dir - string of the directory to link model data to Returns: """ model_stat_gather_by_RUN_dir = os.path.join(stat_data_dir, 'metplus_data', 'by_'+gather_by, RUN_dir_name, RUN_sub_dir_name) if gather_by == 'VALID': model_stat_file = os.path.join(model_stat_gather_by_RUN_dir, valid_time_dt.strftime('%H')+'Z', name, name+'_' +valid_time_dt.strftime('%Y%m%d') +'.stat') link_model_stat_file = os.path.join(link_data_dir, name+'_valid' +valid_time_dt.strftime('%Y%m%d') +'_valid' +valid_time_dt.strftime('%H') +'.stat') elif gather_by == 'INIT': model_stat_file = os.path.join(model_stat_gather_by_RUN_dir, init_time.strftime('%H')+'Z', name, name+'_'+init_time.strftime('%Y%m%d') +'.stat') link_model_stat_file = os.path.join(link_data_dir, name+'_init' +init_time.strftime('%Y%m%d') +'_init'+init_time.strftime('%H') +'.stat') elif gather_by == 'VSDB': if RUN_dir_name in ['grid2grid', 'satellite']: model_stat_file = os.path.join(model_stat_gather_by_RUN_dir, valid_time_dt.strftime('%H')+'Z', name, name+'_' +valid_time_dt.strftime('%Y%m%d') +'.stat') link_model_stat_file = os.path.join(link_data_dir, name+'_valid' +valid_time_dt.strftime( '%Y%m%d' ) +'_valid' +valid_time_dt.strftime('%H') +'.stat') elif RUN_dir_name in ['grid2obs', 'precip']: model_stat_file = os.path.join(model_stat_gather_by_RUN_dir, init_time.strftime('%H')+'Z', name, name+'_' +valid_time_dt.strftime('%Y%m%d') +'.stat') link_model_stat_file = os.path.join(link_data_dir, name+'_valid' +valid_time_dt.strftime( '%Y%m%d' ) +'_init' +init_time.strftime('%H') +'.stat') if not os.path.exists(link_model_stat_file): if os.path.exists(model_stat_file): os.system('ln -sf '+model_stat_file+' '+link_model_stat_file) else: print("WARNING: "+model_stat_file+" does not exist") # Read in common environment variables RUN = os.environ['RUN'] model_list = os.environ['model_list'].split(' ') model_dir_list = os.environ['model_dir_list'].split(' ') model_stat_dir_list = os.environ['model_stat_dir_list'].split(' ') model_file_format_list = os.environ['model_file_format_list'].split(' ') model_data_run_hpss = os.environ['model_data_run_hpss'] model_hpss_dir_list = os.environ['model_hpss_dir_list'].split(' ') start_date = os.environ['start_date'] end_date = os.environ['end_date'] spinup_period_start = os.environ['spinup_period_start'] spinup_period_end = os.environ['spinup_period_end'] make_met_data_by = os.environ['make_met_data_by'] plot_by = os.environ['plot_by'] machine = os.environ['machine'] RUN_abbrev = os.environ['RUN_abbrev'] if RUN != 'tropcyc': RUN_type_list = os.environ[RUN_abbrev+'_type_list'].split(' ') # Set up spin up period check_spinup_period = False if spinup_period_start != 'NA' and spinup_period_end != 'NA': check_spinup_period = True spinup_period_start_dt = datetime.datetime.strptime( spinup_period_start, '%Y%m%d%H' ) spinup_period_end_dt = datetime.datetime.strptime( spinup_period_end, '%Y%m%d%H' ) # Set some common varaibles hpss_prod_base_dir = '/NCEPPROD/hpssprod/runhistory' cwd = os.getcwd() # No HPSS access from Orion, S4, or Hercules if machine in ['ORION', 'S4', 'HERCULES']: print("WARNING: "+machine+" does not currently have access to HPSS..." +"setting model_data_runhpss to NO") model_data_run_hpss = 'NO' if RUN == 'grid2grid_step1': # Read in RUN related environment variables global_archive = os.environ['global_archive'] # Get model forecast and truth files for each option in RUN_type_list for RUN_type in RUN_type_list: print("Gathering files for "+RUN_type) RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_truth_name = os.environ[ RUN_abbrev_type+'_truth_name' ] RUN_abbrev_type_truth_file_format_list = os.environ[ RUN_abbrev_type+'_truth_file_format_list' ].split(' ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, make_met_data_by ) RUN_abbrev_type_valid_time_list = [] # Get forecast files for each model for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_file_format = model_file_format_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] print("- Gathering model forecast file for "+model+" for " +"init: "+init_time.strftime('%Y%m%d%H')+", " +"lead: "+lead+", " +"valid: "+valid_time.strftime('%Y%m%d%H')) get_file = True if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: print("WARNING: init. hour "+init_time.strftime('%H')+" " +"not in list of requested init. hours " +','.join(RUN_abbrev_type_fcyc_list)) get_file = False if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested valid hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if check_spinup_period: if init_time >= spinup_period_start_dt \ and init_time <= spinup_period_end_dt: print("WARNING: lead "+lead+" with init. time " +init_time.strftime('%Y%m%d%H')+" " +"in spinup period "+spinup_period_start +"-"+spinup_period_end) get_file = False if get_file: if valid_time not in RUN_abbrev_type_valid_time_list: RUN_abbrev_type_valid_time_list.append(valid_time) get_model_file(valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, link_model_dir, 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}') # Get truth files for each model RUN_abbrev_type_truth_name_short = ( RUN_abbrev_type_truth_name.split('_')[0] ) RUN_abbrev_type_truth_name_lead = ( RUN_abbrev_type_truth_name.split('_')[1] ) if RUN_abbrev_type_truth_name_lead == 'f00': RUN_abbrev_type_truth_name_lead = '00' for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) for valid_time in RUN_abbrev_type_valid_time_list: print("- Gathering model truth file for "+model+" for " +"valid: "+valid_time.strftime('%Y%m%d%H')) get_file = True if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested valid hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if check_spinup_period: if valid_time >= spinup_period_start_dt \ and valid_time <= spinup_period_end_dt: print("WARNING: Valid time " +valid_time.strftime('%Y%m%d%H')+" " +"in spinup period "+spinup_period_start +"-"+spinup_period_end) get_file = False if get_file: link_truth_file = os.path.join( link_model_dir, format_filler(RUN_type+'.truth.{valid?fmt=%Y%m%d%H}', valid_time, valid_time, RUN_abbrev_type_truth_name_lead) ) if RUN_abbrev_type_truth_name in ['self_anl', 'self_f00', 'gfs_anl', 'gfs_f00', 'gdas_anl', 'gdas_f00', 'ecm_f00']: if RUN_abbrev_type_truth_name in ['self_anl', 'self_f00']: model_RUN_abbrev_type_truth_dir = model_dir model_RUN_abbrev_type_truth_file_format = ( RUN_abbrev_type_truth_file_format_list\ [model_idx] ) RUN_abbrev_type_truth_name_short = model model_RUN_abbrev_type_data_run_hpss = ( model_data_run_hpss ) model_RUN_abbrev_type_truth_hpss_dir = ( model_hpss_dir ) else: model_RUN_abbrev_type_truth_dir = global_archive model_RUN_abbrev_type_truth_file_format = ( RUN_abbrev_type_truth_file_format_list[0] ) if RUN_abbrev_type_truth_name_short == 'ecm': model_RUN_abbrev_type_truth_hpss_dir = '/null' model_RUN_abbrev_type_data_run_hpss = 'NO' else: model_RUN_abbrev_type_truth_hpss_dir = ( hpss_prod_base_dir ) model_RUN_abbrev_type_data_run_hpss = ( model_data_run_hpss ) if RUN_abbrev_type_truth_name_short == 'gdas': RUN_abbrev_type_truth_name_short = 'gfs' get_model_file( valid_time, valid_time, RUN_abbrev_type_truth_name_lead, RUN_abbrev_type_truth_name_short, model_RUN_abbrev_type_truth_dir, model_RUN_abbrev_type_truth_file_format, model_RUN_abbrev_type_data_run_hpss, model_RUN_abbrev_type_truth_hpss_dir, link_model_dir, RUN_type+'.truth.{valid?fmt=%Y%m%d%H}' ) truth_file = os.path.join( model_RUN_abbrev_type_truth_dir, RUN_abbrev_type_truth_name_short, format_filler( model_RUN_abbrev_type_truth_file_format, valid_time, valid_time, RUN_abbrev_type_truth_name_lead ) ) elif RUN_abbrev_type_truth_name in ['common_anl', 'common_f00', 'model_mean']: if 'common' in RUN_abbrev_type_truth_name: mean_truth_dir = os.path.join( cwd, 'data', RUN_abbrev_type_truth_name ) elif RUN_abbrev_type_truth_name == 'model_mean': mean_truth_dir = os.path.join( cwd, 'data', RUN_type+'_'+RUN_abbrev_type_truth_name ) if not os.path.exists(mean_truth_dir): os.makedirs(mean_truth_dir) truth_file = os.path.join( mean_truth_dir, mean_truth_dir.rpartition('/')[2] +'.'+valid_time.strftime('%Y%m%d%H') ) if not os.path.exists(truth_file): if 'common' in RUN_abbrev_type_truth_name: mean_truth_model_list = ['gfs', 'ecm', 'ukm', 'cmc'] mean_truth_model_dir_list = [global_archive]*4 elif RUN_abbrev_type_truth_name == 'model_mean': mean_truth_model_list = model_list mean_truth_model_dir_list = model_dir_list create_mean_truth( mean_truth_model_list, mean_truth_model_dir_list, RUN_abbrev_type_truth_file_format_list, valid_time, os.environ[RUN_abbrev_type+'_grid'], mean_truth_dir ) if os.path.exists(truth_file): os.system('ln -sf '+truth_file+' ' +link_truth_file) # Check model RUN_type truth file exists, if not try # to use model's own f00 file if not os.path.exists(link_truth_file) \ and RUN_abbrev_type_truth_name != 'self_f00': print("WARNING: "+RUN_type+" truth file (" +truth_file+") not found...will try to link " +"model f00 from "+link_model_dir+" instead") link_model_f00_file = os.path.join( link_model_dir, format_filler('f000.{init?fmt=%Y%m%d%H}', valid_time, valid_time, '00') ) if os.path.exists(link_model_f00_file): os.system('ln -sf '+link_model_f00_file+' ' +link_truth_file) if not os.path.exists(link_truth_file): print("WARNING: Unable to link model f00 file as " +"subsitute truth file "+link_truth_file) elif RUN == 'grid2grid_step2': # Read in RUN related environment variables # Get stat files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_gather_by_list = os.environ[ RUN_abbrev_type+'_gather_by_list' ].split(' ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, plot_by ) # Get stat files model for model in model_list: model_idx = model_list.index(model) model_stat_dir = model_stat_dir_list[model_idx] model_RUN_abbrev_type_gather_by = ( RUN_abbrev_type_gather_by_list[model_idx] ) link_model_RUN_type_dir = os.path.join(cwd, 'data', model, RUN_type) if not os.path.exists(link_model_RUN_type_dir): os.makedirs(link_model_RUN_type_dir) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: continue elif valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: get_model_stat_file(valid_time, init_time, lead, model, model_stat_dir, model_RUN_abbrev_type_gather_by, 'grid2grid', RUN_type, link_model_RUN_type_dir) elif RUN == 'grid2obs_step1': # Read in RUN related environment variables prepbufr_run_hpss = os.environ[RUN_abbrev+'_prepbufr_data_run_hpss'] prepbufr_prod_upper_air_dir = os.environ['prepbufr_prod_upper_air_dir'] prepbufr_prod_conus_sfc_dir = os.environ['prepbufr_prod_conus_sfc_dir'] prepbufr_arch_dir = os.environ['prepbufr_arch_dir'] iabp_ftp = os.environ['iabp_ftp'] # No HPSS access from Orion, S4, Hercules if machine in ['ORION', 'S4', 'HERCULES']: print("WARNING: "+machine+" does not currently have access to HPSS..." +"setting "+RUN_abbrev+"_prepbufr_data_run_hpss to NO") prepbufr_run_hpss = 'NO' # Get model forecast and observation files for each option in RUN_type_list for RUN_type in RUN_type_list: print("Gathering files for "+RUN_type) RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, make_met_data_by ) RUN_abbrev_type_valid_time_list = [] # Get model forecast files for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_file_format = model_file_format_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] print("- Gathering model forecast file for "+model+" for " +"init: "+init_time.strftime('%Y%m%d%H')+", " +"lead: "+lead+", " +"valid: "+valid_time.strftime('%Y%m%d%H')) get_file = True if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: print("WARNING: init. hour "+init_time.strftime('%H')+" " +"not in list of requested init. hours " +','.join(RUN_abbrev_type_fcyc_list)) get_file = False if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested valid hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if check_spinup_period: if init_time >= spinup_period_start_dt \ and init_time <= spinup_period_end_dt: print("WARNING: lead "+lead+" with init time " +init_time.strftime('%Y%m%d%H')+" " +"in spinup period "+spinup_period_start +"-"+spinup_period_end) get_file = False if get_file: if valid_time not in RUN_abbrev_type_valid_time_list: RUN_abbrev_type_valid_time_list.append(valid_time) get_model_file(valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, link_model_dir, 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}') # Get RUN_type observation files for valid_time in RUN_abbrev_type_valid_time_list: print("- Gathering truth file for " +"valid: "+valid_time.strftime('%Y%m%d%H')) YYYYmmddHH = valid_time.strftime('%Y%m%d%H') YYYYmmdd = valid_time.strftime('%Y%m%d') YYYYmm = valid_time.strftime('%Y%m') YYYY = valid_time.strftime('%Y') mm = valid_time.strftime('%m') dd = valid_time.strftime('%d') HH = valid_time.strftime('%H') DOY = valid_time.strftime('%j') get_file = True if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if get_file: if RUN_type == 'polar_sfc': iabp_dir = os.path.join(cwd, 'data', 'iabp') if not os.path.exists(iabp_dir): os.makedirs(iabp_dir) os.makedirs(os.path.join(iabp_dir, 'wget_jobs')) iabp_YYYYmmdd_file = os.path.join(iabp_dir, 'iabp.'+YYYYmmdd) iabp_region_list = ['Arctic', 'Antarctic'] iabp_var_dict = { 'BP': ('PRES', '0'), 'Ts': ('TMP', '0'), 'Ta': ('TMP', '2') } niabp_vars = len(list(iabp_var_dict.keys())) ascii2nc_file_cols = [ 'Message_Type', 'Station_ID', 'Valid_Time', 'Lat', 'Lon', 'Elevation', 'Variable_Name', 'Level', 'Height', 'QC_String', 'Observation_Value' ] if not os.path.exists(iabp_YYYYmmdd_file): # Get IABP data from web for iabp_region in iabp_region_list: iabp_region_YYYYmmdd_file = os.path.join( iabp_dir, iabp_region+'_FR_'+YYYYmmdd+'.dat' ) if not os.path.exists(iabp_region_YYYYmmdd_file): iabp_ftp_region_YYYYmmdd_file = os.path.join( iabp_ftp, iabp_region, 'FR_'+YYYYmmdd+'.dat' ) iabp_wget_job_filename = os.path.join( iabp_dir, 'wget_jobs', 'wget_'+iabp_region+'_FR_'+YYYYmmdd+'.sh' ) iabp_wget_job_output = os.path.join( iabp_dir, 'wget_jobs', 'wget_'+iabp_region+'_FR_'+YYYYmmdd+'.out' ) iabp_wget_job_name = ( 'wget_'+iabp_region+'_FR_'+YYYYmmdd ) with open(iabp_wget_job_filename, 'w') \ as iabp_wget_job_file: iabp_wget_job_file.write('#!/bin/sh'+'\n') iabp_wget_job_file.write( 'wget ' +iabp_ftp_region_YYYYmmdd_file+' ' +'--no-check-certificate -O ' +iabp_region_YYYYmmdd_file ) wget_data(iabp_wget_job_filename, iabp_wget_job_name, iabp_wget_job_output) if not os.path.exists(iabp_region_YYYYmmdd_file): print("WARNING: Could not get IABP files from " +"FTP for "+iabp_region+" on "+YYYYmmdd) else: if os.path.getsize(iabp_region_YYYYmmdd_file) \ == 0: print("WARNING: Could not get IABP files " +"from FTP for "+iabp_region+" " +"on "+YYYYmmdd) os.remove(iabp_region_YYYYmmdd_file) iabp_reg_YYYYmmdd_file_list = glob.glob( os.path.join(iabp_dir, '*'+YYYYmmdd+'.dat') ) # Combine and reformat files for ascii2nc if len(iabp_reg_YYYYmmdd_file_list) != 0: iabp_ascii2nc_data = pd.DataFrame( columns=ascii2nc_file_cols ) idx_ascii2nc = 0 for iabp_reg_YYYYmmdd_file \ in iabp_reg_YYYYmmdd_file_list: iabp_reg_YYYYmmdd_data = pd.read_csv( iabp_reg_YYYYmmdd_file, sep=";", skipinitialspace=True, header=None, dtype=str, names=['BuoyID', 'Year', 'Hour', 'Min', 'DOY', 'POS_DOY', 'Lat', 'Lon', 'BP', 'Ts', 'Ta'] ) niabp_reg_YYYYmmdd_data = len( iabp_reg_YYYYmmdd_data.index ) dates = [] for idx in iabp_reg_YYYYmmdd_data.index: dates.append( datetime.datetime.strptime( iabp_reg_YYYYmmdd_data.loc[ idx,'Year' ]+' ' +iabp_reg_YYYYmmdd_data.loc[ idx,'DOY' ].split('.')[0]+' ' +iabp_reg_YYYYmmdd_data.loc[ idx,'Hour' ]+' ' +iabp_reg_YYYYmmdd_data.loc[ idx,'Min' ], '%Y %j %H %M' ).strftime('%Y%m%d_%H%M%S') ) iabp_ascii2nc_reg_data = pd.DataFrame( index=pd.RangeIndex( 0, niabp_reg_YYYYmmdd_data * niabp_vars, 1 ), columns=ascii2nc_file_cols ) iabp_ascii2nc_reg_data.loc[ :, 'Message_Type' ] = 'IABP' iabp_ascii2nc_reg_data.loc[ :, 'Station_ID' ] = iabp_reg_YYYYmmdd_data.loc[:, 'BuoyID'] \ .tolist() * niabp_vars iabp_ascii2nc_reg_data.loc[ :, 'Valid_Time' ] = dates * 3 iabp_ascii2nc_reg_data.loc[ :, 'Lat' ] = iabp_reg_YYYYmmdd_data.loc[:, 'Lat'] \ .tolist() * niabp_vars iabp_ascii2nc_reg_data.loc[ :, 'Lon' ] = iabp_reg_YYYYmmdd_data.loc[:, 'Lon'] \ .tolist() * niabp_vars iabp_ascii2nc_reg_data.loc[ :, 'Elevation' ] = '0' iabp_ascii2nc_reg_data.loc[ :, 'Level' ] = '0' iabp_ascii2nc_reg_data.loc[ :, 'QC_String' ] = 'NA' for iabp_var in list(iabp_var_dict.keys()): start_var_idx = ( niabp_reg_YYYYmmdd_data * list(iabp_var_dict.keys()) \ .index(iabp_var) ) end_var_idx = ( niabp_reg_YYYYmmdd_data * (list(iabp_var_dict.keys()) \ .index(iabp_var)+1) - 1 ) iabp_ascii2nc_reg_data.loc[ start_var_idx:end_var_idx, 'Variable_Name' ] = iabp_var_dict[iabp_var][0] iabp_ascii2nc_reg_data.loc[ start_var_idx:end_var_idx, 'Height' ] = iabp_var_dict[iabp_var][1] iabp_var_vals = np.array( iabp_reg_YYYYmmdd_data \ .loc[:, iabp_var].tolist(), dtype=float ) iabp_var_vals = np.ma.masked_where( iabp_var_vals <= -999.00, iabp_var_vals ) if iabp_var == 'BP': iabp_var_vals = iabp_var_vals * 100. else: iabp_var_vals = iabp_var_vals + 273.15 iabp_var_vals = iabp_var_vals.astype(str) iabp_var_vals.set_fill_value('NA') iabp_ascii2nc_reg_data.loc[ start_var_idx:end_var_idx, 'Observation_Value' ] = iabp_var_vals.filled() iabp_ascii2nc_data = ( iabp_ascii2nc_data.append( iabp_ascii2nc_reg_data ).reset_index(drop=True) ) iabp_ascii2nc_data_string = ( iabp_ascii2nc_data.to_string(header=False, index=False) ) if os.path.exists(iabp_YYYYmmdd_file): os.remove(iabp_YYYYmmdd_file) with open(iabp_YYYYmmdd_file, 'a') as output_file: output_file.write(iabp_ascii2nc_data_string) elif RUN_type in ['upper_air', 'conus_sfc']: link_prepbufr_dir = os.path.join(cwd, 'data', 'prepbufr') if not os.path.exists(link_prepbufr_dir): os.makedirs(link_prepbufr_dir) os.makedirs( os.path.join(link_prepbufr_dir, 'HPSS_jobs') ) prepbufr_dict_list = [] # Set up prepbufr file information if RUN_type == 'upper_air': prepbufr = 'gdas' link_prepbufr_file = os.path.join( link_prepbufr_dir, 'prepbufr.'+prepbufr+'.' +YYYYmmddHH ) prepbufr_prod_file = os.path.join( prepbufr_prod_upper_air_dir, prepbufr+'.'+YYYYmmdd, HH, 'atmos', prepbufr+'.t'+HH+'z.prepbufr' ) prepbufr_arch_file = os.path.join( prepbufr_arch_dir, prepbufr, 'prepbufr.'+prepbufr +'.'+YYYYmmddHH ) (prepbufr_hpss_tar, prepbufr_hpss_file, prepbufr_hpss_job_filename) = set_up_gfs_hpss_info( valid_time, hpss_prod_base_dir, prepbufr, 'prepbufr', link_prepbufr_dir ) prepbufr_dict = {} prepbufr_dict['prod_file'] = prepbufr_prod_file prepbufr_dict['arch_file'] = prepbufr_arch_file prepbufr_dict['hpss_tar'] = prepbufr_hpss_tar prepbufr_dict['hpss_file'] = prepbufr_hpss_file prepbufr_dict['hpss_job_filename'] = ( prepbufr_hpss_job_filename ) prepbufr_dict['file_type'] = prepbufr prepbufr_dict_list.append(prepbufr_dict) elif RUN_type == 'conus_sfc': prepbufr = 'nam' link_prepbufr_file = os.path.join( link_prepbufr_dir, 'prepbufr.'+prepbufr+'.' +YYYYmmddHH ) offset_hr = str(int(HH)%6).zfill(2) offset_time = valid_time + datetime.timedelta( hours=int(offset_hr) ) offset_YYYYmmddHH = offset_time.strftime('%Y%m%d%H') offset_YYYYmmdd = offset_time.strftime('%Y%m%d') offset_YYYYmm = offset_time.strftime('%Y%m') offset_YYYY = offset_time.strftime('%Y') offset_mm = offset_time.strftime('%m') offset_dd = offset_time.strftime('%d') offset_HH = offset_time.strftime('%H') offset_filename = ( 'nam.t'+offset_HH+'z.prepbufr.tm'+offset_hr ) prepbufr_prod_file = os.path.join( prepbufr_prod_conus_sfc_dir, 'nam.' +offset_YYYYmmdd, offset_filename ) prepbufr_arch_file = os.path.join( prepbufr_arch_dir, 'nam', 'nam.' +offset_YYYYmmdd, offset_filename ) if offset_time \ >= datetime.datetime.strptime('20221129', '%Y%m%d'): prepbufr_hpss_tar_prefix = 'com_obsproc_v1.1_nam.' elif offset_time \ >= datetime.datetime.strptime('20220628', '%Y%m%d') \ and offset_time \ < datetime.datetime.strptime('20221129', '%Y%m%d'): prepbufr_hpss_tar_prefix = 'com_obsproc_v1.0_nam.' elif offset_time \ >= datetime.datetime.strptime('20200227', '%Y%m%d') \ and offset_time \ < datetime.datetime.strptime('20220628', '%Y%m%d'): prepbufr_hpss_tar_prefix = 'com_nam_prod_nam.' elif offset_time \ >= datetime.datetime.strptime('20190821', '%Y%m%d') \ and offset_time \ < datetime.datetime.strptime('20200227', '%Y%m%d'): prepbufr_hpss_tar_prefix = ( 'gpfs_dell1_nco_ops_com_nam_prod_nam.' ) else: prepbufr_hpss_tar_prefix = 'com2_nam_prod_nam.' prepbufr_hpss_tar = os.path.join( hpss_prod_base_dir, 'rh'+offset_YYYY, offset_YYYYmm, offset_YYYYmmdd, prepbufr_hpss_tar_prefix +offset_YYYYmmddHH+'.bufr.tar') prepbufr_hpss_file = offset_filename prepbufr_dict = {} prepbufr_dict['prod_file'] = prepbufr_prod_file prepbufr_dict['arch_file'] = prepbufr_arch_file prepbufr_dict['hpss_tar'] = prepbufr_hpss_tar prepbufr_dict['hpss_file'] = prepbufr_hpss_file prepbufr_dict['hpss_job_filename'] = os.path.join( link_prepbufr_dir, 'HPSS_jobs', 'HPSS_' +prepbufr_hpss_tar.rpartition('/')[2] +'_'+prepbufr_hpss_file.replace('/', '_')+'.sh' ) prepbufr_dict_list.append(prepbufr_dict) # Get prepbufr file if not os.path.exists(link_prepbufr_file): for prepbufr_dict in prepbufr_dict_list: prod_file = prepbufr_dict['prod_file'] arch_file = prepbufr_dict['arch_file'] hpss_tar = prepbufr_dict['hpss_tar'] hpss_file = prepbufr_dict['hpss_file'] hpss_job_filename = ( prepbufr_dict['hpss_job_filename'] ) #Check for rstprod access on Orion/Hercules if machine in ['ORION', 'HERCULES']: groups_output = subprocess.check_output( 'groups', shell=True, encoding='UTF-8' ) if 'rstprod' in groups_output: arch_file = arch_file.replace( prepbufr_arch_dir, '/work/noaa/rstprod/verif/prepbufr' ) else: prod_file = prod_file+'.nr' arch_file = arch_file+'.nr' hpss_file = hpss_file+'.nr' #Make sure using non restricted data for S4 if machine == 'S4': prod_file = prod_file+'.nr' arch_file = arch_file+'.nr' hpss_file = hpss_file+'.nr' if os.path.exists(prod_file): print("Linking "+prod_file+" to " +link_prepbufr_file) os.system('ln -sf '+prod_file+' ' +link_prepbufr_file) elif os.path.exists(arch_file): print("Linking "+arch_file+" to " +link_prepbufr_file) os.system('ln -sf '+arch_file+' ' +link_prepbufr_file) else: if prepbufr_run_hpss == 'YES': print("Did not find "+prod_file+" or " +arch_file+" online...going to try " +"to get file from HPSS") get_hpss_data(hpss_job_filename, link_prepbufr_dir, link_prepbufr_file, hpss_tar, hpss_file) if os.path.exists(link_prepbufr_file): break else: if prepbufr_run_hpss == 'YES': print("WARNING: "+prod_file+" and " +arch_file+" do not exist and did " +"not find HPSS file "+hpss_file+" " +"from "+hpss_tar+" or walltime " +"exceeded") else: print("WARNING: "+prod_file+" and " +arch_file+" do not exist") if prepbufr_dict != prepbufr_dict_list[-1]: print("Checking next prepbufr file valid " +"at "+YYYYmmddHH) else: print("Already got "+link_prepbufr_file) elif RUN == 'grid2obs_step2': # Read in RUN related environment variables # Get stat files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_gather_by_list = os.environ[ RUN_abbrev_type+'_gather_by_list' ].split(' ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, plot_by ) # Get stat files model for model in model_list: model_idx = model_list.index(model) model_stat_dir = model_stat_dir_list[model_idx] model_RUN_abbrev_type_gather_by = ( RUN_abbrev_type_gather_by_list[model_idx] ) link_model_RUN_type_dir = os.path.join(cwd, 'data', model, RUN_type) if not os.path.exists(link_model_RUN_type_dir): os.makedirs(link_model_RUN_type_dir) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: continue elif valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: get_model_stat_file(valid_time, init_time, lead, model, model_stat_dir, model_RUN_abbrev_type_gather_by, 'grid2obs', RUN_type, link_model_RUN_type_dir) elif RUN == 'precip_step1': # Read in RUN related environment variables obs_run_hpss = os.environ[RUN_abbrev+'_obs_data_run_hpss'] ccpa_accum24hr_prod_dir = os.environ['ccpa_24hr_prod_dir'] ccpa_accum24hr_arch_dir = os.environ['ccpa_24hr_arch_dir'] # No HPSS access from Orion, S4, and Hercules if machine in ['ORION', 'S4', 'HERCULES']: print("WARNING: "+machine+" does not currently have access to HPSS..." +"setting "+RUN_abbrev+"_obs_data_run_hpss to NO") obs_run_hpss = 'NO' # Get model forecast and observation files for each option in RUN_type_list for RUN_type in RUN_type_list: print("Gathering files for "+RUN_type) RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_model_bucket_list = os.environ[ RUN_abbrev_type+'_model_bucket_list' ].split(' ') RUN_abbrev_type_model_var_list = os.environ[ RUN_abbrev_type+'_model_var_list' ].split(' ') RUN_abbrev_type_model_file_format_list = os.environ[ RUN_abbrev_type+'_model_file_format_list' ].split(' ') RUN_abbrev_type_accum_length = ( RUN_type.split('accum')[1].replace('hr','') ) # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, make_met_data_by ) RUN_abbrev_type_valid_time_list = [] # Get model forecast files for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] model_bucket = RUN_abbrev_type_model_bucket_list[model_idx] model_var = RUN_abbrev_type_model_var_list[model_idx] model_file_format = ( RUN_abbrev_type_model_file_format_list[model_idx] ) link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead_end = time['lead'] print("- Gathering model forecat file for "+model+" for " +"init: "+init_time.strftime('%Y%m%d%H')+", " +"end accumulation lead: "+lead_end+", " +"valid: "+valid_time.strftime('%Y%m%d%H')) get_file = True if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: print("WARNING: init. hour "+init_time.strftime('%H')+" " +"not in list of requested init. hours " +','.join(RUN_abbrev_type_fcyc_list)) get_file = False if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested valid hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if check_spinup_period: if init_time >= spinup_period_start_dt \ and init_time <= spinup_period_end_dt: print("WARNING: lead "+lead_end+" with init time " +init_time.strftime('%Y%m%d%H')+" " +"in spinup period "+spinup_period_start +"-"+spinup_period_end) get_file = False if get_file: if valid_time not in RUN_abbrev_type_valid_time_list: RUN_abbrev_type_valid_time_list.append(valid_time) lead_in_accum_list = [] if model_bucket == 'continuous': nfiles_in_accum = 2 lead_in_accum_list.append(lead_end) lead_start = ( int(lead_end)-int(RUN_abbrev_type_accum_length) ) if lead_start > 0: lead_in_accum_list.append(str(lead_start)) else: nfiles_in_accum = ( int(RUN_abbrev_type_accum_length)/int(model_bucket) ) nf = 1 while nf <= nfiles_in_accum: lead_now = int(lead_end)-((nf-1)*int(model_bucket)) if lead_now >= 0: lead_in_accum_list.append(str(lead_now)) nf+=1 if len(lead_in_accum_list) == nfiles_in_accum: for lead in lead_in_accum_list: if model_var == 'PRATE': if not os.path.exists( os.path.join(link_model_dir, 'PRATE_files') ): os.mkdir(os.path.join(link_model_dir, 'PRATE_files')) os.mkdir(os.path.join(link_model_dir, 'PRATE_files', 'HPSS_jobs')) get_model_file( valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, os.path.join(link_model_dir, 'PRATE_files'), 'PRATE.f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}' ) link_PRATE_model_file = os.path.join( link_model_dir, 'PRATE_files', format_filler( 'PRATE.f{lead?fmt=%3H}.' +'{init?fmt=%Y%m%d%H}', valid_time, init_time, lead ) ) link_model_file = os.path.join( link_model_dir, format_filler( 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}', valid_time, init_time, lead ) ) if os.path.exists(link_PRATE_model_file) \ and not os.path.exists(link_model_file): cnvgrib = os.environ['CNVGRIB'] wgrib2 = os.environ['WGRIB2'] tmp_gb2_file = os.path.join(link_model_dir, 'tmp_gb2') tmp_gb2_APCP_file = os.path.join( link_model_dir, 'tmp_gb2_APCP' ) os.system( cnvgrib+' -g12 ' +link_PRATE_model_file+' ' +tmp_gb2_file ) os.system( wgrib2+' '+tmp_gb2_file+' -match ' +'":PRATE:" -rpn "3600:*" -set_var ' +'APCP -set table_4.10 1 -grib_out ' +tmp_gb2_APCP_file+' >>/dev/null' ) os.system( cnvgrib+' -g21 '+tmp_gb2_APCP_file+' ' +link_model_file ) os.system( 'rm '+os.path.join(link_model_dir, 'tmp*') ) elif model_var == 'APCP': get_model_file( valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, link_model_dir, 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}' ) # Get RUN_type observation files for valid_time in RUN_abbrev_type_valid_time_list: print("- Gathering truth file for " +"valid: "+valid_time.strftime('%Y%m%d%H')) YYYYmmddHH = valid_time.strftime('%Y%m%d%H') YYYYmmdd = valid_time.strftime('%Y%m%d') YYYYmm = valid_time.strftime('%Y%m') YYYY = valid_time.strftime('%Y') mm = valid_time.strftime('%m') dd = valid_time.strftime('%d') HH = valid_time.strftime('%H') if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: link_RUN_type_dir = os.path.join(cwd, 'data', RUN_type) if not os.path.exists(link_RUN_type_dir): os.makedirs(link_RUN_type_dir) os.makedirs(os.path.join(link_RUN_type_dir, 'HPSS_jobs')) if RUN_type == 'ccpa_accum24hr': link_RUN_type_file = os.path.join( link_RUN_type_dir, 'ccpa.'+YYYYmmdd+'12.24h' ) RUN_type_prod_file = os.path.join( ccpa_accum24hr_prod_dir, 'precip.'+YYYYmmdd, 'ccpa.'+YYYYmmdd+'12.24h' ) RUN_type_arch_file = os.path.join( ccpa_accum24hr_arch_dir, 'ccpa.'+YYYYmmdd+'12.24h' ) if valid_time \ >= datetime.datetime.strptime('20220628', '%Y%m%d'): RUN_type_hpss_tar_prefix = ('com_verf_precip_v4.5_' +'precip.') elif valid_time \ >= datetime.datetime.strptime('20200226', '%Y%m%d') \ and valid_time \ < datetime.datetime.strptime('20220628', '%Y%m%d'): RUN_type_hpss_tar_prefix = 'com_verf_prod_precip.' elif valid_time \ >= datetime.datetime.strptime('20200126', '%Y%m%d') \ and valid_time \ < datetime.datetime.strptime('20200226', '%Y%m%d'): RUN_type_hpss_tar_prefix = ( 'gpfs_dell1_nco_ops_com_verf_prod_precip.' ) else: RUN_type_hpss_tar_prefix = 'com_verf_prod_precip.' RUN_type_hpss_tar = os.path.join( hpss_prod_base_dir, 'rh'+YYYY, YYYYmm, YYYYmmdd, RUN_type_hpss_tar_prefix+YYYYmmdd+'.precip.tar' ) RUN_type_hpss_file = 'ccpa.'+YYYYmmdd+'12.24h' if not os.path.exists(link_RUN_type_file): if os.path.exists(RUN_type_prod_file): print("Linking "+RUN_type_prod_file+" to " +link_RUN_type_file) os.system('ln -sf '+RUN_type_prod_file+' ' +link_RUN_type_file) elif os.path.exists(RUN_type_arch_file): print("Linking "+RUN_type_arch_file+" to " +link_RUN_type_file) os.system('ln -sf '+RUN_type_arch_file+' ' +link_RUN_type_file) else: if obs_run_hpss == 'YES': print("Did not find "+RUN_type_prod_file+" or " +RUN_type_arch_file+" online...going to try " +"to get file from HPSS") hpss_job_filename = os.path.join( link_RUN_type_dir, 'HPSS_jobs', 'HPSS_'+RUN_type_hpss_tar.rpartition('/')[2] +'_'+RUN_type_hpss_file.replace('/', '_')+'.sh' ) get_hpss_data(hpss_job_filename, link_RUN_type_dir, link_RUN_type_file, RUN_type_hpss_tar, RUN_type_hpss_file) else: print("Already got "+link_RUN_type_file) if not os.path.exists(link_RUN_type_file): if obs_run_hpss == 'YES': print("WARNING: "+RUN_type_prod_file+" and " +RUN_type_arch_file+" do not exist and did " +"not find HPSS file "+RUN_type_hpss_file+" " +"from "+RUN_type_hpss_tar+" or walltime " +"exceeded") else: print("WARNING: "+RUN_type_prod_file+" and " +RUN_type_arch_file+" do not exist") elif RUN == 'precip_step2': # Read in RUN related environment variables # Get stat files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_gather_by_list = os.environ[ RUN_abbrev_type+'_gather_by_list' ].split(' ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, plot_by ) # Get stat files model for model in model_list: model_idx = model_list.index(model) model_stat_dir = model_stat_dir_list[model_idx] model_RUN_abbrev_type_gather_by = ( RUN_abbrev_type_gather_by_list[model_idx] ) link_model_RUN_type_dir = os.path.join(cwd, 'data', model, RUN_type) if not os.path.exists(link_model_RUN_type_dir): os.makedirs(link_model_RUN_type_dir) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: continue elif valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: get_model_stat_file(valid_time, init_time, lead, model, model_stat_dir, model_RUN_abbrev_type_gather_by, 'precip', RUN_type, link_model_RUN_type_dir) elif RUN == 'satellite_step1': import netCDF4 as nc # Read in RUN related environment variables ghrsst_ncei_avhrr_anl_ftp = os.environ['ghrsst_ncei_avhrr_anl_ftp'] ghrsst_ospo_geopolar_anl_ftp = os.environ['ghrsst_ospo_geopolar_anl_ftp'] # Get model forecast and truth files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type print("Gathering files for "+RUN_type) # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, make_met_data_by ) RUN_abbrev_type_valid_time_list = [] # Get forecast and truth files for each model for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_file_format = model_file_format_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) # Get model forecast files for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead_end = time['lead'] print("- Gathering model forecast file for "+model+" for " +"init: "+init_time.strftime('%Y%m%d%h')+", " +"end accumulation lead: "+lead_end+", " +"valid: "+valid_time.strftime('%Y%m%d%H')) get_file = True if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: print("WARNING: init. hour "+init_time.strftime('%H')+" " +"not in list of requested init. hours " +','.join(RUN_abbrev_type_fcyc_list)) get_file = False if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: print("WARNING: valid hour "+valid_time.strftime('%H')+" " +"not in list of requested valid hours " +','.join(RUN_abbrev_type_vhr_list)) get_file = False if check_spinup_period: if init_time >= spinup_period_start_dt \ and init_time <= spinup_period_end_dt: print("WARNING: lead "+lead_end+" with init time " +"in spinup period "+spinup_period_start +"-"+spinup_period_end) get_file = False if get_file: if valid_time not in RUN_abbrev_type_valid_time_list: RUN_abbrev_type_valid_time_list.append(valid_time) if RUN_type in ['ghrsst_ncei_avhrr_anl', 'ghrsst_ospo_geopolar_anl']: lead_intvl = 6 nfiles_in_mean = 5 lead_in_mean_list = [] nf = 1 while nf <= nfiles_in_mean: lead_now = int(lead_end)-((nf-1)*lead_intvl) if lead_now >= 0: lead_in_mean_list.append(str(lead_now)) nf+=1 if len(lead_in_mean_list) == nfiles_in_mean: for lead in lead_in_mean_list: get_model_file(valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, link_model_dir, 'f{lead?fmt=%3H}' +'.{init?fmt=%Y%m%d%H}') # Get RUN_type observation files for valid_time in RUN_abbrev_type_valid_time_list: print("- Gathering truth file for " +"valid: "+valid_time.strftime('%Y%m%d%H')) YYYYmmddHH = valid_time.strftime('%Y%m%d%H') YYYYmmdd = valid_time.strftime('%Y%m%d') YYYYmm = valid_time.strftime('%Y%m') YYYY = valid_time.strftime('%Y') mm = valid_time.strftime('%m') dd = valid_time.strftime('%d') HH = valid_time.strftime('%H') DOY = valid_time.strftime('%j') valid_timeM1 = valid_time - datetime.timedelta(hours=24) YYYYmmddHHM1 = valid_timeM1.strftime('%Y%m%d%H') YYYYmmddM1 = valid_timeM1.strftime('%Y%m%d') YYYYmmM1 = valid_timeM1.strftime('%Y%m') YYYYM1 = valid_timeM1.strftime('%Y') mmM1 = valid_timeM1.strftime('%m') ddM1 = valid_timeM1.strftime('%d') HHM1 = valid_timeM1.strftime('%H') DOYM1 = valid_timeM1.strftime('%j') if valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: link_RUN_type_dir = os.path.join(cwd, 'data', RUN_type) if not os.path.exists(link_RUN_type_dir): os.makedirs(link_RUN_type_dir) os.makedirs(os.path.join(link_RUN_type_dir, 'wget_jobs')) link_RUN_type_file = os.path.join(link_RUN_type_dir, RUN_type+'.'+YYYYmmddHH) if RUN_type in ['ghrsst_ncei_avhrr_anl', 'ghrsst_ospo_geopolar_anl']: # ghrsst_ncei_avhrr_anl: YYYYmmddM-1 00Z-YYYYmmdd 00Z if RUN_type == 'ghrsst_ncei_avhrr_anl': RUN_type_ftp_file = os.path.join( ghrsst_ncei_avhrr_anl_ftp, YYYYM1, DOYM1, YYYYmmddM1 +'120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-' +'v02.0-fv02.1.nc' ) adjust_time = 86400 # ghrsst_ospo_geopolar_anl: YYYYmmddM-1 00Z-YYYYmmdd 00Z elif RUN_type == 'ghrsst_ospo_geopolar_anl': RUN_type_ftp_file = os.path.join( ghrsst_ospo_geopolar_anl_ftp, YYYYM1, DOYM1, YYYYmmddM1 +'000000-OSPO-L4_GHRSST-SSTfnd-Geo_Polar_Blended' +'-GLOB-v02.0-fv01.0.nc' ) adjust_time = 43200 RUN_type_wget_job_filename = os.path.join( link_RUN_type_dir, 'wget_jobs', 'wget_'+RUN_type+'.'+YYYYmmddHH+'.sh' ) RUN_type_wget_job_name = 'wget_'+RUN_type+'.'+YYYYmmddHH RUN_type_wget_job_output = os.path.join( link_RUN_type_dir, 'wget_jobs', 'wget_'+RUN_type+'.'+YYYYmmddHH+'.out' ) with open(RUN_type_wget_job_filename, 'w') \ as RUN_type_wget_job_file: RUN_type_wget_job_file.write('#!/bin/sh'+'\n') RUN_type_wget_job_file.write('wget '+RUN_type_ftp_file +' -O ' +link_RUN_type_file) wget_data(RUN_type_wget_job_filename, RUN_type_wget_job_name, RUN_type_wget_job_output) if os.path.exists(link_RUN_type_file) \ and os.path.getsize(link_RUN_type_file) > 0: link_RUN_type_file_data = nc.Dataset( link_RUN_type_file,'r+' ) link_RUN_type_file_data.variables['time'][:] = ( float(link_RUN_type_file_data.variables['time']\ [:][0]) \ +adjust_time ) link_RUN_type_file_data.renameVariable( 'mask', 'mask_byte' ) link_RUN_type_file_data.renameVariable( 'sea_ice_fraction', 'sea_ice_fraction_byte' ) link_RUN_type_file_data.close() link_RUN_type_file_data = nc.Dataset( link_RUN_type_file,'r+' ) for var_name in ['mask', 'sea_ice_fraction']: new_var = link_RUN_type_file_data.createVariable( var_name,"f4",("time","lat","lon",) ) for attname in link_RUN_type_file_data\ .variables[var_name+'_byte'].ncattrs(): if attname != '_FillValue': new_var.setncatts( {attname: \ link_RUN_type_file_data\ .variables[var_name+'_byte']\ .getncattr(attname)} ) new_var[:] = (link_RUN_type_file_data\ .variables[var_name+'_byte'][:]/1.) link_RUN_type_file_data.close() gen_vx_mask = os.path.join( os.environ['HOMEMET'], os.environ['HOMEMET_bin_exec'], 'gen_vx_mask' ) os.system( gen_vx_mask+' '+link_RUN_type_file+' ' +link_RUN_type_file+' ' +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.WATER.nc ') +'-type data -thresh ==1 -mask_field ' +"'name="+'"mask"; level="(0,*,*)";'+"' " +'-name WATER' ) os.system( gen_vx_mask+' '+link_RUN_type_file+' ' +link_RUN_type_file+' ' +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.SEA_ICE.nc ') +'-type data -thresh '+'">=0.15"'+' -mask_field ' +"'name="+'"sea_ice_fraction"; level="(0,*,*)";' +"' "+'-name SEA_ICE' ) os.system( gen_vx_mask+' ' +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.SEA_ICE.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.SEA_ICE.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH +'.vx_mask.SEA_ICE_POLAR.nc ') +'-type lat -thresh '+"'<60&&>-60'"+' -mask_field ' +"'name="+'"SEA_ICE"; level="(*,*)";'+"' " +'-name SEA_ICE_POLAR -value "0"' ) os.system( gen_vx_mask+' ' +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.WATER.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH+'.vx_mask.SEA_ICE.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH +'.vx_mask.SEA_ICE_FREE.nc ') +'-type data -thresh '+"'==0'"+' -intersection ' +'-mask_field '+"'name="+'"SEA_ICE"; ' +'level="(*,*)";'+"' "+'-name SEA_ICE_FREE' ) os.system( gen_vx_mask+' ' +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH +'.vx_mask.SEA_ICE_FREE.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH +'.vx_mask.SEA_ICE_FREE.nc ') +os.path.join(link_RUN_type_dir, RUN_type+'.' +YYYYmmddHH +'.vx_mask.SEA_ICE_FREE_POLAR.nc ') +'-type lat -thresh '+"'<60&&>-60'"+' -mask_field ' +"'name="+'"SEA_ICE_FREE"; level="(*,*)";'+"' " +'-name SEA_ICE_FREE_POLAR -value "0"' ) elif os.path.exists(link_RUN_type_file) \ and os.path.getsize(link_RUN_type_file) == 0: print("WARNING: could not get "+RUN_type_ftp_file) os.remove(link_RUN_type_file) else: print("WARNING: could not get "+RUN_type_ftp_file) elif RUN == 'satellite_step2': # Read in RUN related environment variables # Get stat files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_fcyc_list = os.environ[ RUN_abbrev_type+'_fcyc_list' ].split(' ') RUN_abbrev_type_vhr_list = os.environ[ RUN_abbrev_type+'_vhr_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_'+make_met_data_by.lower()+'_hr_inc' ] RUN_abbrev_type_fhr_list = os.environ[ RUN_abbrev_type+'_fhr_list' ].split(', ') RUN_abbrev_type_gather_by_list = os.environ[ RUN_abbrev_type+'_gather_by_list' ].split(' ') # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, RUN_abbrev_type_fhr_list, plot_by ) # Get stat files model for model in model_list: model_idx = model_list.index(model) model_stat_dir = model_stat_dir_list[model_idx] model_RUN_abbrev_type_gather_by = ( RUN_abbrev_type_gather_by_list[model_idx] ) link_model_RUN_type_dir = os.path.join(cwd, 'data', model, RUN_type) if not os.path.exists(link_model_RUN_type_dir): os.makedirs(link_model_RUN_type_dir) for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if init_time.strftime('%H') not in RUN_abbrev_type_fcyc_list: continue elif valid_time.strftime('%H') not in RUN_abbrev_type_vhr_list: continue else: get_model_stat_file(valid_time, init_time, lead, model, model_stat_dir, model_RUN_abbrev_type_gather_by, 'satellite', RUN_type, link_model_RUN_type_dir) elif RUN == 'tropcyc': # Read in RUN related environment variables RUN_abbrev_fcyc_list = os.environ[RUN_abbrev+'_fcyc_list'].split(' ') RUN_abbrev_vhr_list = os.environ[RUN_abbrev+'_vhr_list'].split(' ') RUN_abbrev_model_atcf_name_list = ( os.environ[RUN_abbrev+'_model_atcf_name_list'].split(' ') ) RUN_abbrev_model_file_format_list = ( os.environ[RUN_abbrev+'_model_file_format_list'].split(' ') ) RUN_abbrev_tropcyc_use_adeck_for_missing_data = ( os.environ[RUN_abbrev+'_use_adeck_for_missing_data'] ) RUN_abbrev_config_storm_list = ( os.environ[RUN_abbrev+'_storm_list'].split(' ') ) RUN_abbrev_fhr_list = os.environ[RUN_abbrev+'_fhr_list'].split(', ') # Check storm_list to see if all storms for basin and year requested import get_tc_info tc_dict = get_tc_info.get_tc_dict() RUN_abbrev_tc_list = [] for config_storm in RUN_abbrev_config_storm_list: config_storm_basin = config_storm.split('_')[0] config_storm_year = config_storm.split('_')[1] config_storm_name = config_storm.split('_')[2] if config_storm_name == 'ALLNAMED': for byn in list(tc_dict.keys()): if config_storm_basin+'_'+config_storm_year in byn: RUN_abbrev_tc_list.append(byn) else: RUN_abbrev_tc_list.append(config_storm) # Get bdeck/truth and model track files for tc in RUN_abbrev_tc_list: basin = tc.split('_')[0] year = tc.split('_')[1] name = tc.split('_')[2] tc_id = tc_dict[tc] # Get adeck/bdeck files for deck in ['a', 'b']: link_deck_dir = os.path.join(cwd, 'data', deck+'deck') if not os.path.exists(link_deck_dir): os.makedirs(link_deck_dir) os.makedirs(os.path.join(link_deck_dir, 'wget_jobs')) deck_filename = deck+tc_id+'.dat' link_deck_file = os.path.join(link_deck_dir, deck_filename) if deck == 'a': link_adeck_file = link_deck_file elif deck == 'b': link_bdeck_file = link_deck_file nhc_atcfnoaa_deck_dir = os.environ['nhc_atcfnoaa_'+deck+'deck_dir'] nhc_atcfnavy_deck_dir = os.environ['nhc_atcfnavy_'+deck+'deck_dir'] nhc_atcf_deck_ftp = os.environ['nhc_atcf_'+deck+'deck_ftp'] nhc_atfc_arch_ftp = os.environ['nhc_atfc_arch_ftp'] if deck == 'b': navy_atcf_bdeck_ftp = os.environ['navy_atcf_bdeck_ftp'] nhc_deck_file = os.path.join(nhc_atcfnoaa_deck_dir, deck_filename) navy_deck_file = os.path.join(nhc_atcfnavy_deck_dir, deck_filename) if os.path.exists(nhc_deck_file): os.system('ln -sf '+nhc_deck_file+' '+link_deck_file) elif os.path.exists(navy_deck_file): os.system('ln -sf '+navy_deck_file+' '+link_deck_file) else: if basin in ['AL', 'CP', 'EP']: nhc_ftp_deck_file = os.path.join(nhc_atcf_deck_ftp, deck_filename) nhc_ftp_deck_file_wget_job_filename = os.path.join( link_deck_dir, 'wget_jobs', 'wget_nhc_'+deck_filename+'.sh' ) nhc_ftp_deck_file_wget_job_output = os.path.join( link_deck_dir, 'wget_jobs', 'wget_nhc_'+deck_filename+'.out' ) nhc_ftp_deck_file_wget_job_name = ( 'wget_nhc_'+deck_filename ) with open(nhc_ftp_deck_file_wget_job_filename, 'w') \ as nhc_ftp_deck_file_wget_job_file: nhc_ftp_deck_file_wget_job_file.write('#!/bin/sh'+'\n') nhc_ftp_deck_file_wget_job_file.write( 'wget -q '+nhc_ftp_deck_file+' -P '+link_deck_dir ) wget_data(nhc_ftp_deck_file_wget_job_filename, nhc_ftp_deck_file_wget_job_name, nhc_ftp_deck_file_wget_job_output) nhc_ftp_deck_gzfile = os.path.join(nhc_atfc_arch_ftp, year, deck_filename+'.gz') nhc_deck_gzfile = os.path.join(link_deck_dir, deck_filename+'.gz') nhc_ftp_deck_gzfile_wget_job_filename = os.path.join( link_deck_dir, 'wget_jobs', 'wget_nhc_gz_'+deck_filename+'.sh' ) nhc_ftp_deck_gzfile_wget_job_output = os.path.join( link_deck_dir, 'wget_jobs', 'wget_nhc_gz_'+deck_filename+'.out' ) nhc_ftp_deck_gzfile_wget_job_name = ( 'wget_nhc_gz_'+deck_filename ) with open(nhc_ftp_deck_gzfile_wget_job_filename, 'w') \ as nhc_ftp_deck_gzfile_wget_job_file: nhc_ftp_deck_gzfile_wget_job_file.write( '#!/bin/sh'+'\n' ) nhc_ftp_deck_gzfile_wget_job_file.write( 'wget -q '+nhc_ftp_deck_gzfile+' -P '+link_deck_dir ) wget_data(nhc_ftp_deck_gzfile_wget_job_filename, nhc_ftp_deck_gzfile_wget_job_name, nhc_ftp_deck_gzfile_wget_job_output) if os.path.exists(nhc_deck_gzfile): os.system('gunzip -q -f '+nhc_deck_gzfile) if not os.path.exists(link_deck_file): print("Did not find "+nhc_deck_file+" or " +navy_deck_file+" and could not get from NHC " +"ftp ("+nhc_ftp_deck_file+", " +nhc_ftp_deck_gzfile+") for "+tc) elif basin == 'WP' and deck == 'b': navy_ftp_bdeck_zipfile = os.path.join(navy_atcf_bdeck_ftp, year, year+'s-bwp', 'bwp'+year+'.zip') navy_bdeck_zipfile = os.path.join(link_deck_dir, 'bwp'+year+'.zip') if not os.path.exists(navy_bdeck_zipfile): navy_ftp_bdeck_zfile_wget_job_filename = os.path.join( link_deck_dir, 'wget_jobs', 'wget_navy_zip_bwp'+year+'.sh' ) navy_ftp_bdeck_zfile_wget_job_output = os.path.join( link_deck_dir, 'wget_jobs', 'wget_navy_zip_bwp'+year+'.out' ) navy_ftp_bdeck_zfile_wget_job_name = ( 'wget_navy_zip_bwp'+year ) with open(navy_ftp_bdeck_zfile_wget_job_filename, 'w') \ as navy_ftp_bdeck_zfile_wget_job_file: navy_ftp_bdeck_zfile_wget_job_file.write( '#!/bin/sh'+'\n' ) navy_ftp_bdeck_zfile_wget_job_file.write( 'wget -q '+navy_ftp_bdeck_zipfile+' -P ' +link_deck_dir ) wget_data(navy_ftp_bdeck_zfile_wget_job_filename, navy_ftp_bdeck_zfile_wget_job_name, navy_ftp_bdeck_zfile_wget_job_output) if os.path.exists(navy_bdeck_zipfile): os.system('unzip -qq -o -d '+link_deck_dir+' ' +navy_bdeck_zipfile+' '+deck_filename) if not os.path.exists(link_deck_file): print("Did not find "+nhc_deck_file+" or " +navy_deck_file+" and could not get from Navy " +"ftp ("+navy_ftp_bdeck_zipfile+" " +deck_filename+") for "+tc) elif basin == 'WP' and deck == 'a': if not os.path.exists(link_deck_file): print("Did not find "+nhc_deck_file+" or " +navy_deck_file+" for "+tc) # Get model track files # currently set up to mimic VSDB verification # which uses model track data initialized # in storm dates if os.path.exists(link_bdeck_file): tc_start_date, tc_end_date = get_tc_info.get_tc_dates( link_bdeck_file ) tc_time_info_dict = get_time_info( tc_start_date[0:8], tc_end_date[0:8], tc_start_date[-2:], tc_end_date[-2:], '21600', ['00'], 'INIT' ) for model in model_list: model_idx = model_list.index(model) model_num = model_idx + 1 model_dir = model_dir_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] model_file_format = ( RUN_abbrev_model_file_format_list[model_idx] ) model_atcf_name = ( RUN_abbrev_model_atcf_name_list[model_idx] ) link_model_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_dir): os.makedirs(link_model_dir) os.makedirs(os.path.join(link_model_dir, 'HPSS_jobs')) for time in tc_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if init_time.strftime('%H') not in RUN_abbrev_fcyc_list: continue elif valid_time.strftime('%H') not in RUN_abbrev_vhr_list: continue else: if 'NCEPPROD' in model_hpss_dir: RUN_model_data_run_hpss = 'NO' else: RUN_model_data_run_hpss = model_data_run_hpss if not os.path.exists(os.path.join(link_model_dir, tc)): os.mkdir(os.path.join(link_model_dir, tc)) link_tc_init_track_file = os.path.join( link_model_dir, tc, format_filler('track.{init?fmt=%Y%m%d%H}.dat', valid_time, init_time, '00') ) if not os.path.exists(link_tc_init_track_file): if model_file_format != 'ADECK': link_init_track_file = os.path.join( link_model_dir, format_filler('track.{init?fmt=%Y%m%d%H}' +'.dat', valid_time, init_time, '00') ) if not os.path.exists(link_init_track_file): get_model_file( valid_time, init_time, lead, model, model_dir, model_file_format, RUN_model_data_run_hpss, model_hpss_dir, link_model_dir, 'track.{init?fmt=%Y%m%d%H}.dat' ) if os.path.exists(link_init_track_file): try: init_track_grep = ( subprocess.check_output( 'grep -R "'+basin+', ' +tc_id[2:4]+', ' +init_time.strftime( '%Y%m%d%H' )+'" ' +link_init_track_file, shell=True, encoding='UTF-8' ) ) if len(init_track_grep) > 0: with open( link_tc_init_track_file, 'w' ) as ltf: ltf.write(init_track_grep) except: print("WARNING: Could not make "+ link_tc_init_track_file +" from " +link_init_track_file) pass if not os.path.exists(link_tc_init_track_file) \ and os.path.exists(link_adeck_file) \ and RUN_abbrev_tropcyc_use_adeck_for_missing_data \ == 'YES': print("Going to try to make " +link_tc_init_track_file+" from " +"adeck file "+link_adeck_file+" for " +model+" searching for ATCF name " +model_atcf_name+" and init time " +init_time.strftime('%Y%m%d%H')) try: adeck_grep = subprocess.check_output( 'grep -R "'+model_atcf_name+'," ' +link_adeck_file+' | grep "' +init_time.strftime('%Y%m%d%H')+'"', shell=True, encoding='UTF-8' ) if len(adeck_grep) > 0: with open(link_tc_init_track_file, 'w') as ltf: ltf.write(adeck_grep) except: print("WARNING: Could not make " +link_tc_init_track_file+" from " +"adeck file "+link_adeck_file) pass ## Check to make sure listed ATCF name in the file ## and do replacements if os.path.exists(link_tc_init_track_file): try: model_atcf_name_grep = ( subprocess.check_output( 'grep -R "'+model_atcf_name+'" ' +link_tc_init_track_file, shell=True, encoding='UTF-8' ) ) model_tmp_atcf_name = ( 'M'+str(model_num).zfill(3) ) print("Replacing "+model+" ATCF name " +model_atcf_name+" with " +model_tmp_atcf_name+" in " +link_tc_init_track_file) os.system('sed -i s/'+model_atcf_name+'/' +model_tmp_atcf_name+'/g ' +link_tc_init_track_file) except: print("WARNING: "+model_atcf_name+" " +"ATCF name for "+model+" not in " +link_tc_init_track_file) pass elif RUN == 'maps2d': # Read in RUN related environment variables global_archive = os.environ['global_archive'] obdata_dir = os.environ['obdata_dir'] RUN_abbrev_plot_diff = os.environ[RUN_abbrev+'_plot_diff'] RUN_abbrev_anl_file_format_list = ( os.environ[RUN_abbrev+'_anl_file_format_list'].split(' ') ) RUN_abbrev_model2model_forecast_anl_diff = os.environ[ RUN_abbrev+'_model2model_forecast_anl_diff' ] RUN_abbrev_model2obs_use_ceres = os.environ[ RUN_abbrev+'_model2obs_use_ceres' ] RUN_abbrev_model2obs_use_monthly_mean = os.environ[ RUN_abbrev+'_model2obs_use_monthly_mean' ] # Get model forecast, analysis, and observation files # for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_make_met_data_by = os.environ[ RUN_abbrev_type+'_make_met_data_by' ] RUN_abbrev_type_hour_list = os.environ[ RUN_abbrev_type+'_hour_list' ].split(' ') RUN_abbrev_type_forecast_to_plot_list = os.environ[ RUN_abbrev_type+'_forecast_to_plot_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_hour_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_hour_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_hour_inc' ] # Make forecast hour list forecast_to_plot_fhr_list = [] for forecast_to_plot in RUN_abbrev_type_forecast_to_plot_list: if forecast_to_plot == 'anl': forecast_to_plot_fhr_list.append([forecast_to_plot]) elif forecast_to_plot[0] == 'f': forecast_to_plot_fhr_list.append([forecast_to_plot[1:]]) elif forecast_to_plot[0] == 'd': fhr4 = int(forecast_to_plot[1:]) * 24 fhr3 = str(fhr4 - 6) fhr2 = str(fhr4 - 12) fhr1 = str(fhr4 - 18) fhr0 = str(fhr4 - 24) forecast_to_plot_fhr_list.append([fhr1, fhr2, fhr3, str(fhr4)]) # Get model forecast, analysis, observation files # for all forecast_to_plot for forecast_to_plot in RUN_abbrev_type_forecast_to_plot_list: forecast_to_plot_idx = ( RUN_abbrev_type_forecast_to_plot_list.index(forecast_to_plot) ) fhr_list = ( forecast_to_plot_fhr_list[forecast_to_plot_idx] ) # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, fhr_list, RUN_abbrev_type_make_met_data_by ) # Get forecast, analysis, observation files for each model for model in model_list: model_idx = model_list.index(model) model_dir = model_dir_list[model_idx] model_file_format = model_file_format_list[model_idx] model_hpss_dir = model_hpss_dir_list[model_idx] anl_file_format = RUN_abbrev_anl_file_format_list[model_idx] link_model_data_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_data_dir): os.makedirs(link_model_data_dir) os.makedirs(os.path.join(link_model_data_dir, 'HPSS_jobs')) # Get model forecast files for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if RUN_abbrev_type_make_met_data_by == 'INIT': hour = init_time.strftime('%H') elif RUN_abbrev_type_make_met_data_by == 'VALID': hour = valid_time.strftime('%H') if hour not in RUN_abbrev_type_hour_list: continue else: if forecast_to_plot == 'anl': ftp_file_format = anl_file_format ftp_link_file_format = ( 'anl.{'+RUN_abbrev_type_make_met_data_by \ .lower()+'?fmt=%Y%m%d%H}' ) ftp_lead = 'anl' else: ftp_file_format = model_file_format ftp_link_file_format = ( 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}' ) ftp_lead = lead get_model_file(valid_time, init_time, ftp_lead, model, model_dir, ftp_file_format, model_data_run_hpss, model_hpss_dir, link_model_data_dir, ftp_link_file_format) model_fcst_ftp_lead_file = os.path.join( link_model_data_dir, format_filler( ftp_link_file_format, valid_time, init_time, ftp_lead ) ) # Get observation files if needed if RUN_type == 'model2model': if RUN_abbrev_model2model_forecast_anl_diff \ == 'YES': obtype_list = [model+'_anl'] else: obtype_list = [model] elif RUN_type == 'model2obs': obtype_list = ['gpcp', 'ghcn_cams'] if RUN_abbrev_model2obs_use_ceres == 'YES': obtype_list.append('ceres') else: obtype_list.extend( ['clwp', 'nvap', 'rad_isccp', 'rad_srb2'] ) link_obs_dir = os.path.join(cwd, 'data', 'obs') if not os.path.exists(link_obs_dir): os.makedirs(link_obs_dir) for obtype in obtype_list: model_fcst_ftp_files_filename = os.path.join( link_model_data_dir, RUN_type+'_fcst_'+model+'_obs_'+obtype+'_' +forecast_to_plot+'_fcst_file_list.txt' ) model_obs_ftp_files_filename = os.path.join( link_model_data_dir, RUN_type+'_fcst_'+model+'_obs_'+obtype+'_' +forecast_to_plot+'_obs_file_list.txt' ) if obtype == model: model_obs_ftp_lead_file = ( model_fcst_ftp_lead_file ) elif obtype == model+'_anl': get_model_file(valid_time, valid_time, 'anl', model, model_dir, anl_file_format, model_data_run_hpss, model_hpss_dir, link_model_data_dir, 'anl.{valid?fmt=%Y%m%d%H}') model_obs_ftp_lead_file = os.path.join( link_model_data_dir, format_filler( 'anl.{valid?fmt=%Y%m%d%H}', valid_time, valid_time, 'anl' ) ) else: link_obtype_dir = os.path.join(link_obs_dir, obtype) if not os.path.exists(link_obtype_dir): os.makedirs(link_obtype_dir) YYYY = valid_time.strftime('%Y') B = valid_time.strftime('%B')[0:3] if obtype in ['clwp', 'nvap', 'rad_isccp', 'rad_srb2']: obtype_dir = os.path.join( obdata_dir, 'vsdb_climo_data', 'CF_compliant' ) obtype_filename = obtype+'_'+B+'.nc' else: if RUN_abbrev_model2obs_use_monthly_mean \ == 'YES': obtype_dir = os.path.join( obdata_dir, obtype, 'monthly_mean' ) obtype_filename = ( obtype+'_'+B+YYYY+'.nc' ) if not os.path.exists( os.path.join(obtype_dir, obtype_filename) ): print( "WARNING: " +os.path.join(obtype_dir, obtype_filename) +" does not exist. Will try " +"substituting climo file " +os.path.join(obdata_dir, obtype, 'monthly_climo', obtype+'_'+B +'.nc') ) obtype_dir = os.path.join( obdata_dir, obtype, 'monthly_climo' ) obtype_filename = ( obtype+'_'+B+'.nc' ) else: obtype_dir = os.path.join( obdata_dir, obtype, 'monthly_climo' ) obtype_filename = obtype+'_'+B+'.nc' obtype_file = os.path.join(obtype_dir, obtype_filename) link_obtype_file = os.path.join( link_obtype_dir, obtype_filename ) model_obs_ftp_lead_file = link_obtype_file if not os.path.exists(link_obtype_file): if os.path.exists(obtype_file): os.system('ln -sf '+obtype_file+' ' +link_obtype_file) else: print("WARNING: "+obtype_file+" does " +"not exist") # Add to file list if os.path.exists(model_fcst_ftp_lead_file) \ and \ os.path.exists(model_obs_ftp_lead_file): with open(model_fcst_ftp_files_filename, 'a') as \ fcst_files: fcst_files.write( model_fcst_ftp_lead_file+'\n' ) with open(model_obs_ftp_files_filename, 'a') as \ obs_files: obs_files.write( model_obs_ftp_lead_file+'\n' ) elif RUN == 'mapsda': # Read in RUN related environment variables RUN_abbrev_gdas_model_file_format_list = os.environ[ RUN_abbrev+'_gdas_model_file_format_list' ].split(' ') RUN_abbrev_gdas_anl_file_format_list = os.environ[ RUN_abbrev+'_gdas_anl_file_format_list' ].split(' ') RUN_abbrev_ens_model_dir_list = os.environ[ RUN_abbrev+'_ens_model_dir_list' ].split(' ') RUN_abbrev_ens_model_data_run_hpss = os.environ[ RUN_abbrev+'_ens_model_data_run_hpss' ] # Get model forecast and truth files for each option in RUN_type_list for RUN_type in RUN_type_list: RUN_abbrev_type = RUN_abbrev+'_'+RUN_type # Read in RUN_type environment variables RUN_abbrev_type_make_met_data_by = os.environ[ RUN_abbrev_type+'_make_met_data_by' ] RUN_abbrev_type_hour_list = os.environ[ RUN_abbrev_type+'_hour_list' ].split(' ') RUN_abbrev_type_guess_hour = os.environ[ RUN_abbrev_type+'_guess_hour' ] RUN_abbrev_type_model_file_format_list = os.environ[ RUN_abbrev_type+'_model_file_format_list' ].split(' ') RUN_abbrev_type_start_hr = os.environ[ RUN_abbrev_type+'_hour_beg' ] RUN_abbrev_type_end_hr = os.environ[ RUN_abbrev_type+'_hour_end' ] RUN_abbrev_type_hr_inc = os.environ[ RUN_abbrev_type+'_hour_inc' ] # Get date and time information for RUN_type RUN_abbrev_type_time_info_dict = get_time_info( start_date, end_date, RUN_abbrev_type_start_hr, RUN_abbrev_type_end_hr, RUN_abbrev_type_hr_inc, [RUN_abbrev_type_guess_hour], RUN_abbrev_type_make_met_data_by ) for model in model_list: model_idx = model_list.index(model) model_hpss_dir = model_hpss_dir_list[model_idx] model_file_format = RUN_abbrev_type_model_file_format_list[ model_idx ] link_model_data_dir = os.path.join(cwd, 'data', model) if not os.path.exists(link_model_data_dir): os.makedirs(link_model_data_dir) os.makedirs(os.path.join(link_model_data_dir, 'HPSS_jobs')) # Get RUN_type gdas files if RUN_type == 'gdas': model_dir = model_dir_list[model_idx] anl_file_format = RUN_abbrev_gdas_anl_file_format_list[ model_idx ] obtype = model+'_anl' # Get model guess and analysis files for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if RUN_abbrev_type_make_met_data_by == 'INIT': hour = init_time.strftime('%H') elif RUN_abbrev_type_make_met_data_by == 'VALID': hour = valid_time.strftime('%H') if hour not in RUN_abbrev_type_hour_list: continue else: get_model_file(valid_time, init_time, lead, model, model_dir, model_file_format, model_data_run_hpss, model_hpss_dir, link_model_data_dir, 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}') model_fcst_file = os.path.join( link_model_data_dir, format_filler( 'f{lead?fmt=%3H}.{init?fmt=%Y%m%d%H}', valid_time, init_time, lead ) ) get_model_file(valid_time, valid_time, 'anl', model, model_dir, anl_file_format, model_data_run_hpss, model_hpss_dir, link_model_data_dir, 'anl.{valid?fmt=%Y%m%d%H}') model_obs_file = os.path.join( link_model_data_dir, format_filler( 'anl.{valid?fmt=%Y%m%d%H}', valid_time, valid_time, 'anl' ) ) model_fcst_files_filename = os.path.join( link_model_data_dir, RUN_type+'_fcst_'+model+'_obs_'+obtype+'_fhr' +lead+'_fcst_file_list.txt' ) model_obs_files_filename = os.path.join( link_model_data_dir, RUN_type+'_fcst_'+model+'_obs_'+obtype+'_fhr' +lead+'_obs_file_list.txt' ) # Add to file list if os.path.exists(model_fcst_file) \ and \ os.path.exists(model_obs_file): with open(model_fcst_files_filename, 'a') as \ fcst_files: fcst_files.write( model_fcst_file+'\n' ) with open(model_obs_files_filename, 'a') as \ obs_files: obs_files.write( model_obs_file+'\n' ) # Get RUN_type ens files if RUN_type == 'ens': model_dir = RUN_abbrev_ens_model_dir_list[model_idx] exisiting_file_list = '' for ens_file_type in ['mean', 'spread']: ens_model_file_format = model_file_format.replace( '[mean,spread]', ens_file_type ) exisiting_file_list = '' for time in RUN_abbrev_type_time_info_dict: valid_time = time['valid_time'] init_time = time['init_time'] lead = time['lead'] if RUN_abbrev_type_make_met_data_by == 'INIT': hour = init_time.strftime('%H') elif RUN_abbrev_type_make_met_data_by == 'VALID': hour = valid_time.strftime('%H') if hour not in RUN_abbrev_type_hour_list: continue else: get_model_file(valid_time, init_time, lead, model, model_dir, ens_model_file_format, RUN_abbrev_ens_model_data_run_hpss, model_hpss_dir, link_model_data_dir, 'atmf{lead?fmt=%3H}.ens' +ens_file_type+'.' +'{init?fmt=%Y%m%d%H}.nc') link_ens_file = os.path.join( link_model_data_dir, format_filler( 'atmf{lead?fmt=%3H}.ens'+ens_file_type+'.' +'{init?fmt=%Y%m%d%H}.nc', valid_time, init_time, lead ) ) if os.path.exists(link_ens_file): exisiting_file_list= (exisiting_file_list +link_ens_file+' ') avg_file = os.path.join(link_model_data_dir, format_filler('atmf{lead?fmt=%3H}' '.ens'+ens_file_type +'.nc', valid_time, init_time, lead)) print("Creating average files for "+model+" " +"ens"+ens_file_type+" from available data. " +"Saving as "+avg_file) ncea = os.environ['NCEA'] if '.nc4' in model_file_format: process_vars = '' else: process_vars = ( ' -v tmp,ugrd,vgrd,spfh,pressfc,o3mr,clwmr ' ) if exisiting_file_list != '': os.system(ncea+' '+exisiting_file_list+' -o ' +avg_file+process_vars) print("END: "+os.path.basename(__file__))