o pg3,@sddlZddlZddlZddlZddlZddlZddlm Z ej dZ ej dej e ddlmZddlZddZddZd d Zd d Zd dZddZddZddZddZddZddZddZddZdd Z d!d"Z!dS)#N) timedeltaUSH_DIR) prune_datac Csd|vrd|dd}||t||dkrA|\}}|jt|dtt|d}|jt|dtt|d} || f} | S|dkrI|} | Sdt|d d }||t|) NzOne or more FCST_z_HOURS is Nonetype. This may bez# because the input string is empty.INIT)hourhoursVALIDzInvalid DATE_TYPE: z. Valid values arez VALID or INIT)error ValueErrorreplacemintdmaxstrupper) logger date_type date_range date_hoursfleadseinit_beginit_end valid_beg valid_end valid_rangerH/lfs/h1/ops/prod/packages/evs.v1.0.19/ush/global_det/df_preprocessing.pyget_valid_range"s4     rcCsjdd| D} dttj}tj|t|dt| dt| dt||}tj|rt t |r{| d|dd|t |||||t|t| t| t|t| dd| Dt| | | |S|d }d |d }||||t|d ||d }d|d }||||t|d |)NcSg|]}t|qSrr.0modelrrr ?z"run_prune_data..tmp_zLooking for stat files in z using thez template: cSr rr!)r#Z fcst_var_namerrrr%Sr&z exists but is empty.z Populate z and retry. z does not exist.zCreate and populate )ruuiduuid4hexospathjoinrisdirlenlistdirinforlowerr OSError)r stats_dir prune_diroutput_base_template verif_case verif_type line_typer eval_periodvar_namefcst_var_names model_listobtypedomaintmp_dirpruned_data_dire1e2rrrrun_prune_data<sJ           rFcCs4|jr|d|d|d|ddSdS)Nz Called from :z-Empty Dataframe. Continuing onto next plot...z(========================================TF)emptyr r3)dfrZ called_fromrrr check_emptyds   rJc Csdd|D}|dd}|dd} |D]} tj|t| d} tj| sU|dt| dd |d |d |d d t| d d|dq|s_|d| zdt |} t ||t| } t | | f}tj| ddd|td}dt| }||dD] }||t||<qz t||g}Wnty|}Ynty}z|}WYd}~nd}~wwWqtjjy}z|||d|| |dWYd}~qd}~wty}z|||d|| |dWYd}~qd}~ww|rGzt|Wn(tyF}z|||d|||dWYd}~nd}~wwzt||drRWdS|jddd|WSty}z|||d|dtdWYd}~dSd}~ww)NcSr rr!r"rrrr%or&zcreate_df..rz %HZ %d %B %Yz.statzThe stat file for z does not exist in .z'You might check whether the stats_dir (z ) includesz, data according to the output_base template,z given domain, variable, etc...zContinuing ...z*Creating dataframe using pruned data from T)delim_whitespaceheaderskiprowsnamesdtypezThe file in question:zThe directory in question: create_dfdropinplacez:Nonexistent dataframe. Check the logfile for more details.z Quitting ...) strftimer-r.r/risfilewarningdebug plot_utilget_stat_file_base_columnsget_stat_file_line_type_columnsrnp concatenatepdread_csvr1astypefloatconcat NameErrorUnboundLocalErrorerrorsEmptyDataErrorr r5shutilrmtreerJ reset_indexsysexit)rr6rCr;rr? met_versionclear_prune_dirZ start_stringZ end_stringr$fpathZdf_og_colnamesZdf_line_type_colnamesZ df_colnamesZdf_tmpicol_namerIrrrrrTms                  rTcCs|durdSt|dvr+||djd|djd@}||dd}nt|dvrF||djd|djdB}t||drNdS|S) N)pres upper_airFCST_LEVPOBS_LEVOBTYPEZONLYSF)sfc conus_sfc polar_sfcfilter_by_level_type)rr4 startswitheqrJ)rIrr:rrrr}s$ r}cCs@|durdS||d||d|@}t||drdS|S)NFCST_VAROBS_VARfilter_by_var_name)isinrJ)rIrr> obs_var_namesrrrrs   rcCs:|durdS||dt|}t||drdS|S)N INTERP_MTHDfilter_by_interp)rrrrJ)rIrinterprrrrs  rcC6|durdS||dt|}t||drdS|S)Nryfilter_by_obtyperrrJ)rIrr@rrrr  rcCr)NVX_MASKfilter_by_domainr)rIrrArrrrrrcCs<|durdStdd|dD|d<t||drdS|S)NcSsg|] }t|ddqS)N)int)r#leadrrrr%sz%create_lead_hours.. FCST_LEAD LEAD_HOURScreate_lead_hours)r_arrayrJrIrrrrrs  rcCs6|durdStj|ddd|d<t||drdS|S)NFCST_VALID_ENDz %Y%m%d_%H%M%S)formatr create_valid_datetime)ra to_datetimerJrrrrrrrcsLdurdSjdddfddtdDd<t|dr$dSS)NTrUcs,g|]\}}d|tjt|dqS)r r)ra DateOffsetr)r#vrrIrrr%sz(create_init_datetime..rrcreate_init_datetime)rl enumeraterJrrrrrs   rcCsV|durdS|j|t||dk|t||dk@}t||dr)dS|S)NrrKfilter_by_date_range)locrrrJ)rIrrrrrrrs rcsH|durdS|jfdd|t|jjD}t||dr"dS|S)Ncsg|]}|vqSrr)r#xrrrr% r&z"filter_by_hour..filter_by_hour)rrrdtrrJ)rIrrrrrrrs ( rcCst|||| | }t||||||||| | | |||}t||||||||}t|||}t||| |}t|||}t|||}t|||}t||}t ||}t ||}t ||||}t |||| }|S)N) rrFrTr}rrrrrrrrr)rr6r7r8r9r:r;rrr<rrr=r>rr?r@rArrorprrCrIrrrget_preprocessed_data&s.           r)"r-rmrjr*numpyr_pandasradatetimerrenviron SETTINGS_DIRr.insertabspathprune_stat_filesrr\rrFrJrTr}rrrrrrrrrrrrrrs4   ( K