o qg0,@sddlZddlZddlZddlZddlZddlZddlm Z ej dZ ej dej e ddlmZddlZddZddZd d Zd d Zd dZddZddZddZddZddZddZddZddZdd Z d!d"Z!dS)#N) timedeltaUSH_DIR) prune_datac Csd|vrd|dd}||t||dkrA|\}}|jt|dtt|d}|jt|dtt|d} || f} | S|dkrI|} | Sdt|d d }||t|) NzFATAL ERROR: One or more FCST_z_HOURS is Nonetype. This may bez# because the input string is empty.INIT)hourhoursVALIDz FATAL ERROR: Invalid DATE_TYPE: z. Valid values arez VALID or INIT)error ValueErrorreplacemintdmaxstrupper) logger date_type date_range date_hoursfleadseinit_beginit_end valid_beg valid_end valid_rangerH/lfs/h1/ops/prod/packages/evs.v1.0.19/ush/global_ens/df_preprocessing.pyget_valid_ranges4     rcCsndd| D} dttj}tj|t|dt| dt| dt||}tj|rt t |r{| d|dd|t |||||t|t| t| t|t| dd| Dt| | | |Sd |d }d |d }||||t|d |d |d}d|d }||||t|d |)NcSg|]}t|qSrr.0modelrrr 9z"run_prune_data..tmp_zLooking for stat files in z using thez template: cSr rr!)r#Z fcst_var_namerrrr%Mr&z FATAL ERROR: z exists but is empty.z Populate z and retry. z does not exist.zCreate and populate )ruuiduuid4hexospathjoinrisdirlenlistdirinforlowerr OSError)r stats_dir prune_diroutput_base_template verif_case verif_type line_typer eval_periodvar_namefcst_var_names model_listobtypedomaintmp_dirpruned_data_dire1e2rrrrun_prune_data6sJ           rFcCs4|jr|d|d|d|ddSdS)Nz Called from :z-Empty Dataframe. Continuing onto next plot...z(========================================TF)emptywarningr3)dfrZ called_fromrrr check_empty^s   rKc Csdd|D}|dd}|dd} |D]} tj|t| d} tj| sU|dt| dd |d |d |d d t| d d|dq|s_|d| zdt |} t ||t| } t | | f}tj| ddd|td}dt| }||dD] }||t||<qz t||g}Wnty|}Ynty}z|}WYd}~nd}~wwWqtjjy}z|||d|| |dWYd}~qd}~wty}z|||d|| |dWYd}~qd}~ww|rGzt|Wn(tyF}z|||d|||dWYd}~nd}~wwzt||drRWdS|jddd|WSty}z|||d|dtdWYd}~dSd}~ww)NcSr rr!r"rrrr%ir&zcreate_df..rz %HZ %d %B %Yz.statzThe stat file for z does not exist in .z'You might check whether the stats_dir (z ) includesz, data according to the output_base template,z given domain, variable, etc...zContinuing ...z*Creating dataframe using pruned data from T)delim_whitespaceheaderskiprowsnamesdtypezThe file in question:zThe directory in question: create_dfdropinplacezRFATAL ERROR: Nonexistent dataframe. "df" does not exist. Check for earlier errorsz Quitting ...) strftimer-r.r/risfilerIdebug plot_utilget_stat_file_base_columnsget_stat_file_line_type_columnsrnp concatenatepdread_csvr1astypefloatconcat NameErrorUnboundLocalErrorerrorsEmptyDataErrorr5shutilrmtreerK reset_indexr sysexit)rr6rCr;rr? met_versionclear_prune_dirZ start_stringZ end_stringr$fpathZdf_og_colnamesZdf_line_type_colnamesZ df_colnamesZdf_tmpicol_namerJrrrrrUgs                  rUcCs|durdSt|dvr+||djd|djd@}||dd}nt|dvrF||djd|djdB}t||drNdS|S) N)pres upper_airFCST_LEVPOBS_LEVOBTYPEZONLYSF)sfc conus_sfc polar_sfcfilter_by_level_type)rr4 startswitheqrK)rJrr:rrrr}s$ r}cCs@|durdS||d||d|@}t||drdS|S)NFCST_VAROBS_VARfilter_by_var_name)isinrK)rJrr> obs_var_namesrrrrs   rcCs:|durdS||dt|}t||drdS|S)N INTERP_MTHDfilter_by_interp)rrrrK)rJrinterprrrrs  rcC6|durdS||dt|}t||drdS|S)Nryfilter_by_obtyperrrK)rJrr@rrrr  rcCr)NVX_MASKfilter_by_domainr)rJrrArrrrrrcCs<|durdStdd|dD|d<t||drdS|S)NcSsg|] }t|ddqS)N)int)r#leadrrrr%sz%create_lead_hours.. FCST_LEAD LEAD_HOURScreate_lead_hours)r_arrayrKrJrrrrrs  rcCs6|durdStj|ddd|d<t||drdS|S)NFCST_VALID_ENDz %Y%m%d_%H%M%S)formatr create_valid_datetime)ra to_datetimerKrrrrrrrcsLdurdSjdddfddtdDd<t|dr$dSS)NTrVcs,g|]\}}d|tjt|dqS)r r)ra DateOffsetr)r#vrrJrrr%sz(create_init_datetime..rrcreate_init_datetime)rl enumeraterKrrrrrs   rcCsV|durdS|j|t||dk|t||dk@}t||dr)dS|S)NrrLfilter_by_date_range)locrrrK)rJrrrrrrr s rcsH|durdS|jfdd|t|jjD}t||dr"dS|S)Ncsg|]}|vqSrr)r#xrrrr%r&z"filter_by_hour..filter_by_hour)rrrdtrrK)rJrrrrrrrs ( rcCst|||| | }t||||||||| | | |||}t||||||||}t|||}t||| |}t|||}t|||}t|||}t||}t ||}t ||}t ||||}t |||| }|S)N) rrFrUr}rrrrrrrrr)rr6r7r8r9r:r;rrr<rrr=r>rr?r@rArrorprrCrJrrrget_preprocessed_data!s.           r)"r-rmrjr*numpyr_pandasradatetimerrenviron SETTINGS_DIRr.insertabspathprune_stat_filesrr\rrFrKrUr}rrrrrrrrrrrrrrs4    ( L