#! /usr/bin/env python import os, sys, logging, re, collections ##@var USHhwrf # The ush/ subdirectory of the HWRF installation directory USHhwrf=None ##@var HOMEhwrf # The HWRF installation directory HOMEhwrf=None if os.environ.get('USHhwrf',''): USHhwrf=os.environ['USHhwrf'] if os.environ.get('HOMEhwrf',''): HOMEhwrf=os.environ['HOMEhwrf'] if HOMEhwrf is None and USHhwrf is None: HOMEhwrf=os.path.dirname(os.getcwd()) USHguess=os.path.join(HOMEhwrf,'ush') if os.path.isdir(USHguess): if USHhwrf is None: USHhwrf=USHguess if HOMEhwrf is not None: if USHhwrf is None: USHhwrf=os.path.join(HOMEhwrf,'ush') if USHhwrf is None: print>>sys.stderr, "Cannot guess $USHhwrf. Please set $HOMEhwrf or " \ "$USHhwrf in environment." sys.exit(2) sys.path.append(USHhwrf) import hwrf.numerics from hwrf.numerics import to_datetime,to_datetime_rel, to_fraction,to_timedelta import produtil.run, produtil.setup from produtil.run import runstr, batchexe produtil.setup.setup(send_dbn=False) epsilon=to_timedelta(30) # 30 seconds six_hours=to_timedelta(3600*6) nfile=0 for xmlfile in sys.argv[1:]: nfile+=1 #print '%s: SCAN FILE'%(xmlfile,) cycles=set() is_xml=False with open(xmlfile,'rt') as xmlf: for line in xmlf: if re.search('([0-9]+)',line) if m and m.groups(1) and m.groups(1)[0]: #print line cycle=to_datetime(m.groups(1)[0]) cycles.add(cycle.strftime('%Y%m%d%H%M')) else: m=re.search('([0-9]+) ([0-9]+) 0?6:00:00',line) if m and m.groups(1) and m.groups(1)[0] and m.groups(1)[1]: #print line start=to_datetime(m.groups(1)[0]) end=to_datetime(m.groups(1)[1]) now=start stop=end+epsilon while now',line): print 'BAD CYCLEDEF: '+line exit(1) if not is_xml: print '%s: NOT AN XML FILE!!'%(xmlfile,) exit(1) #print 'Cycles: '+( ' '.join(sorted(cycles))) dbfile=xmlfile[:-3]+'db' cmd=batchexe('rocotostat')['-w',xmlfile,'-d',dbfile,'-c','ALL'] #print repr(cmd) stat=runstr(cmd) last=dict() complete=set() queued=set() badjobs=set() badincompletejobs=set() running=set() printme=list() failme=collections.defaultdict(dict) for line in stat.splitlines(): #201408061200 completion 315615 SUCCEEDED m=re.match('^\s*([0-9]{12})\s+(\S+)\s+\S+\s+(\S+)',line) if m and m.groups(1) and m.groups(1)[0] and m.groups(1)[1] and m.groups(1)[1] and m.groups(1)[2]: cycle=m.groups(1)[0] job=m.groups(1)[1] status=m.groups(1)[2] if status=='-': continue # ignore unstarted jobs if status=='SUCCEEDED': if job=='completion': complete.add(m.groups(1)[0]) last[cycle]=job elif status in ('DEAD','UNKNOWN','LOST','UNAVAILABLE'): failme[cycle][job]=status badjobs.add(cycle) elif status=='RUNNING': running.add(cycle) else: queued.add(cycle) lastline='' lastcycle='' prevcycle='' for cycle in sorted(cycles): if cycle not in last: line='unstarted.' elif cycle in complete: line='complete.' else: line='[last job finished: '+last[cycle]+']' if cycle in running: line+=' [RUNNING]' if cycle in queued: line+=' [QUEUED]' if cycle in badjobs: line+=' [FAILURES]' #printme.append('%s: %s(%s-%s): %s'%(xmlfile,cycle,lastcycle,prevcycle,line)) if lastline and lastline != line: if prevcycle and prevcycle!=lastcycle: printme.append('%s-%s: %s'%(lastcycle,prevcycle,lastline)) else: printme.append(' %s: %s'%(lastcycle,lastline)) lastline=line lastcycle=cycle prevcycle=cycle elif lastline and lastline==line: #printme.append('lastline and lastline!=line') prevcycle=cycle lastline=line else: #printme.append('ELSE') prevcycle=cycle lastcycle=cycle lastline=line if prevcycle and prevcycle!=lastcycle: printme.append('%s-%s: %s'%(lastcycle,prevcycle,lastline)) elif lastcycle: printme.append(' %s: %s'%(lastcycle,lastline)) incomplete=cycles-complete sent_heading=False for cycle in sorted(cycles): appendme='' is_incomplete=cycle in complete if is_incomplete: appendme=' (but cycle completed)' for (job,status) in failme[cycle].iteritems(): if not is_incomplete: badincompletejobs.add(cycle) if not sent_heading: printme.append('Failed jobs:') sent_heading=True printme.append('%s %s IS %s%s'%( cycle,job,status,appendme)) if nfile>1: print if not running and not queued and not badjobs and incomplete: print xmlfile+': STALLED.' print 'Nothing queued, failed or running, but workflow is incomplete. You' print 'should check for dependency errors, and ensure your cron job is running.' for line in printme: print line elif not incomplete: print xmlfile+': COMPLETE.' elif badincompletejobs: print xmlfile+': FAILED JOBS!' for line in printme: print line elif running: if badjobs: print xmlfile+': RUNNING (failed jobs for completed cycles).' else: print xmlfile+': RUNNING.' for line in printme: print line elif queued: if badjobs: print xmlfile+': QUEUED (failed jobs for completed cycles).' else: print xmlfile+': QUEUED.' for line in printme: print line else: print xmlfile+':' for line in printme: print line # incomplete=cycles-complete # if incomplete: # print xmlfile+': incomplete cycles: '+(' '.join(sorted(incomplete))) # else: # print xmlfile+': all cycles completed.'