#! /usr/bin/env python ##@namespace scripts.exhwrf_ensda # Runs one member of the HWRF data assimilation ensemble. This job # must be run after the ensda_pre and exhwrf_init jobs. There is one # mandatory environment variable that must be set before executing this # script: $ENSDA_MEMB, which must be set to the integer ENSDA member # number (generally a number from 1 to 40) import logging, os, sys, traceback import produtil.log, produtil.setup from produtil.log import jlogger import hwrf_wcoss, hwrf_alerts from hwrf.ensda import reset_ensda_flag_file import hwrf.exceptions from produtil.ecflow import set_ecflow_event def fail(msg): """!Write an error message to produtil.log.jlogger and exit with status 2. @param msg the message to write""" jlogger.error(msg) sys.exit(2) def set_vars(logger=None): """!Sets MPI tuning variables for the local machine.""" if produtil.cluster.name() in ['dogwood','cactus']: hwrf_wcoss.cray_ensda_vars(logger) elif produtil.cluster.name() in ['luna','surge']: hwrf_wcoss.cray_ensda_vars(logger) elif produtil.cluster.name() in ['gyre','tide']: hwrf_wcoss.set_vars_for_ensda_hwrf(logger) else: logger.info('Not on WCOSS, so not setting WCOSS-specific vars.') def main(): """!Runs one ENSDA member. The member to run is specified by the ENSDA_MEMB environment variable.""" logger=logging.getLogger('exhwrf_ensda') ENV=os.environ memb=ENV.get('ENSDA_MEMB','NOPE').lower() if memb=='nope': fail('Aborting: you must specify ENSDA_MEMB') imemb=int(memb,10) jlogger.info('HWRF ensda member %03d starting'%imemb) set_vars(logger) import hwrf_expt hwrf_expt.init_module(make_ensemble_da=True) hwrf_expt.conf.add_fallback_callback(hwrf_alerts.fallback_callback) conf=hwrf_expt.conf omemb=hwrf_expt.ensda.member(hwrf_expt.conf.cycle,imemb) try: fail=conf.getstr('failure','ensda_mem%03d_failed'%imemb,'none') if fail=='unexpected_failure': raise hwrf.exceptions.UnexpectedFailureTest() if fail=='expected_failure': raise hwrf.exceptions.ExpectedFailureTest() omemb.run() except(SyntaxError,TypeError,ReferenceError,MemoryError,AttributeError, AssertionError,NameError,hwrf.exceptions.UnexpectedFailureTest) as ne: logger.error('ensda_mem%03d is aborting due to FATAL ERROR: '%imemb+str(ne),exc_info=True) sys.exit(2) except Exception as e: msg='Could not run ensda for member %s; will not run ensda. Unhandled exception: %s'%(imemb,str(e)) if conf.fallback('ensda_mem%03d_failed'%imemb,msg+ '\n\nPython stack information at location of exception:\n\n' +traceback.format_exc()): logger.error(msg,exc_info=True) flag_file='tmpmem%03d.run_ensda'%imemb reset_ensda_flag_file(conf,flag_file,False,False,logger) set_ecflow_event('canceled',logger) return raise for prod in omemb.products(): if not prod.location: logger.error('No product: %s'%(prod.did,)) elif not prod.available: logger.error('Product %s not available (location %s)'%( repr(prod.did),repr(prod.location))) else: dest='%s/%s.ensda_%03d.%s'%( hwrf_expt.conf.getdir('com'), hwrf_expt.conf.getstr('config','out_prefix'), imemb,os.path.basename(prod.location)) logger.info('%s %s: send to %s'%( str(prod.did),repr(imemb),str(dest))) assert(os.path.isabs(dest)) copier=hwrf_expt.wrfcopier.compression_copier(prod.location) if copier is None: logger.error('%s %s: not a NetCDF 3 file.'%( str(prod.did),str(prod.location))) sys.exit(1) produtil.fileop.deliver_file( prod.location,dest,logger=logger, copier=copier) jlogger.info('HWRF ensda member %03d has completed'%imemb) if __name__=='__main__': try: produtil.setup.setup() main() except Exception as e: jlogger.critical('HWRF ensda is aborting: '+str(e),exc_info=True) sys.exit(2)