#! /usr/bin/env python3 """!ATParser is a text parser that replaces strings with variables and function output.""" import sys, os, re, io, logging ##@var functions # List of functions recognized # @protected functions=dict(lc=lambda x:str(x).lower(), uc=lambda x:str(x).upper(), len=lambda x:str(len(x)), trim=lambda x:str(x).strip()) class ParserSyntaxError(Exception): """!Raised when the parser encounters a syntax error.""" class ScriptAssertion(Exception): """!Raised when a script @[VARNAME:?message] is encountered, and the variable does not exist.""" class ScriptAbort(Exception): """!Raised when an "@** abort" directive is reached in a script.""" class NoSuchVariable(Exception): """!Raised when a script requests an unknown variable.""" def __init__(self,infile,varname,line=None): """!NoSuchVariable constructor @param infile the input file that caused problems @param varname the variable that does not exist @param line the line number of the problematic line""" self.infile=infile self.varname=varname if line is None: self.line=None line='??' else: self.line=int(line) line=str(line) super(NoSuchVariable,self).__init__( '%s:%s: undefined variable %s'%(infile,line,varname)) ##@var infile # The file that caused the problem ##@var line # The line number that caused the problem ##@var varname # The problematic variable name def replace_backslashed(text): """!Turns \\t to tab, \\n to end of line, \\r to carriage return, \\b to backspace and \\(octal) to other characters. @param text the text to scan""" if '0123456789'.find(text[1])>=0: return chr(int(text[1:],8)) elif text=='\\n': return "\n" elif text=='\\t': return "\t" elif text=='\\r': return "\r" elif text=='\\b': return "\b" else: return text # Parser states: ##@var outer # Parser state for the portion of the file outside @[] and @** blocks outer=dict(active=True,in_if_block=False,in_ifelse_block=False,used_if=False,ignore=False) ##@ var if_unused_if # Parser state for within @**if blocks that are inactive if_unused_if=dict(active=False,in_if_block=True,in_ifelse_block=False,used_if=False,ignore=False) ##@var if_active_if # Parser state for within @** if blocks that are active if_active_if=dict(active=True,in_if_block=True,in_ifelse_block=False,used_if=True,ignore=False) ##@var if_used_if # Parser state for after the end of an @** if block if_used_if=dict(active=False,in_if_block=True,in_ifelse_block=True,used_if=True,ignore=False) ##@var if_active_else # Parser state for inside an "else" block if_active_else=dict(active=True,in_if_block=False,in_ifelse_block=True,used_if=True,ignore=False) ##@var if_inactive_else # Parser state for inside an "else" block that was not used if_inactive_else=dict(active=False,in_if_block=False,in_ifelse_block=True,used_if=True,ignore=False) ##@var ignore_if_block # Parser state for an "if" block that was skipped ignore_if_block=dict(active=False,in_if_block=True,in_ifelse_block=False,used_if=False,ignore=True) ##@var ignore_else_block # Parser state for an "else" block that was skipped ignore_else_block=dict(active=False,in_if_block=False,in_ifelse_block=True,used_if=False,ignore=True) class ATParser: """!Takes input files or other data, and replaces certain strings with variables or functions. The calling convention is quite simple: @code{.py} ap=ATParser(varhash={"NAME":"Katrina", "STID":"12L"}) ap.parse_file("input-file.txt") lines="line 1\nline 2\nline 3 of @[NAME]" ap.parse_lines(lines,"(string-data)") ap.parse_stream(sys.stdin,"(stdin)") @endcode Inputs are general strings with @@[...] and @@** escape sequences which follow familiar shell syntax (but with @@[...] instead of ${...}): @code{.unformatted} My storm is @[NAME] and the RSMC is @[RSMC:-${center:-unknown}]. @endcode In this case, it would print: @code{.unformatted} My storm is Katrina and the RSMC is unknown. @endcode since NAME is set, but RSMC and center are unset. There are also block if statements: @code{.unformatted} @** if NAME==BILLY storm is billy @** elseif name==KATRINA storm is katrina @** else another storm @** endif @endcode and a variety of other things: @code{.unformatted} @[length: count=length-start return val[ start : (start+count) ] elif operator=='=': replaced=self.replace_vars(operand) self.varhash[varname]=replaced elif operator=='==' or operator=='!=': # This is the ternary ?: operator. val=vartext mo=re.match(r'(?ms)\A((?:[^\\\?]|(?:\\\\)*|(?:\\\\)*\\.)*)\?(.*?):((?:[^\\:]|(?:\\\\)*|(?:\\\\)*\\.)*)\Z',operand) if mo is None: (test,thendo,elsedo)=('','','') else: (test,thendo,elsedo)=mo.groups() test=self.replace_vars(test) if operator=='==': return self.replace_vars( thendo if (val==test) else elsedo) else: return self.replace_vars( thendo if (val!=test) else elsedo) elif operator==':=': if not varset: self.varhash[varname]=self.replace_vars(operand) return self.varhash[varname] elif operator==':?': if varset: return vartext elif operand=='': raise ScriptAssertion('%s: you did not define this ' 'variable. Aborting.'%(varname,)) else: raise ScriptAssertion('%s: %s'%(varname,operand)) elif varname is not None and varname!='': return self.require_var(varname+functions) else: raise ParserSyntaxError( "Don't know what to do with text \"%s\""%(data,)) def require_data(self,data): """!Expand text within an @@[...] block. @param data the contents of the block @protected""" if data[0]=='<': # This is an instruction to read in a file. return self.require_file(data[1:]) elif data=='@': return '@' # @[@] is replaced with @ elif data[0]=='#': if data.find('@[')>=0: raise ParserSyntaxError('Found a @[ construct nested within a comment (@[#...])') return '' # @[#stuff] is a comment else: # This is a variable name, command or error: return self.var_or_command(data) def str_state(self): """!Return a string description of the parser stack for debugging.""" out=io.StringIO() out.write('STATE STACK: \n') for state in self._states: out.write('state: ') if state['ignore']: out.write('ignoring block: ') out.write('active ' if(state['active']) else 'inactive ') if state['in_if_block']: out.write('in if block, before else ') if state['in_ifelse_block']: out.write('in if block, after else ') if not state['in_if_block'] and not state['in_ifelse_block']: out.write('not if or else') if state['used_if']: out.write('(have activated a past if/elseif/else) ') out.write('\n') out.write('END\n') s=out.getvalue() out.close() return s @property def active(self): """!Is the current block active? @protected""" if self._states: for state in self._states: if not state['active']: return False return True def top_state(self,what=None): """!Return the top parser state without removing it @param what why the state is being examined. This is for error messages. @protected""" if what: if not self._states: raise AssertionError('Internal error: no state to search when looking for %s in top state.'%(what,)) elif what not in self._states[-1]: raise AssertionError('Internal error: cannot find %s in top state.'%(what,)) return bool(self._states[-1][what]) else: return self._states[-1] def push_state(self,state): """!Push a new state to the top of the parser state stack @protected""" self._states.append(state) def pop_state(self): """!Remove and return the top parser state @protected""" return self._states.pop() def replace_state(self,state): """!Replace the top parser state. @protected @param state the new parser state""" self._states[len(self._states)-1]=state def parse_lines(self,lines,filename): """!Given a multi-line string, parse the contents line-by-line @param lines the multi-line string @param filename the name of the file it was from, for error messages""" lineno=1 for line in lines.splitlines(): self.parse_line(line,filename,lineno) lineno+=1 def parse_line(self,line,filename,lineno): """!Parses one line of text. @param line the line of text. @param filename the name of the source file, for error messages @param lineno the line number within the source file, for error messages""" top_state=self.top_state replace_state=self.replace_state m=re.match(r'^\s*\@\*\*\s*if\s+([A-Za-z_][A-Za-z_0-9.]*)\s*([!=])=\s*(.*?)\s*$',line) if m: # This is the beginning of an IF block if not self.active: # This IF lies within an inactive block, so we skip # this whole if, elseif, else, endif block. self.push_state(ignore_if_block) return (left,comp,right)=m.groups() left=self.optional_var(left) right=self.replace_vars(right) if left==right: if comp=='=': self.push_state(if_active_if) else: self.push_state(if_unused_if) # self.push_state( if_active_if if(comp=='=') else if_unused_if ) else: if comp=='=': self.push_state(if_unused_if) else: self.push_state(if_active_if) # self.push_state( if_unused_if if(comp=='=') else if_active_if ) return m=re.match(r'^\s*\@\*\*\s*abort\s+(.*)$',line) if m: if self.active: raise ScriptAbort('Found an abort directive on line %d: %s'%( lineno, m.group(1))) return m=re.match(r'^\s*\@\*\*\s*warn\s+(.*)$',line) if m: if self.active: self.warn(self.replace_vars(m.group(1))) return m=re.match('^\s*\@\*\*\s*else\s*if\s+([A-Za-z_][A-Za-z_0-9.]*)\s*([!=])=\s*(.*?)\s*\Z',line) if m: if top_state('ignore'): return (left, comp, right) = m.groups() left=self.optional_var(left) right=self.replace_vars(right) if not self._states: raise ParserSyntaxError( 'Found an elseif without a matching if at line %d'%lineno) if not top_state('in_if_block'): if top_state('in_ifelse_block'): raise ParserSyntaxError( 'Unexpected elseif after an else at line %d'%lineno) else: raise ParserSyntaxError( 'Unexpected elseif at line %d'%lineno) elif top_state('used_if'): # the "if" or a prior elseif matched, so we ignore # this elseif and deactivate the block so all future # if/else/elseif will be unused. replace_state(if_used_if) elif not top_state('active'): activate=0 if left==right: activate = 3 if (comp=='=') else 0 else: activate = 0 if (comp=='=') else 3 if activate: replace_state(if_active_if) return m=re.match(r'^\s*\@\*\*\s*else\s*(?:\#.*)?$',line) if m: if top_state("used_if"): replace_state(if_inactive_else) elif top_state('in_ifelse_block'): raise ParserSyntaxError('Found an extra else at line %d'%lineno) elif not top_state('in_if_block'): raise ParserSyntaxError('Found an else outside an if at line %d'%lineno) elif top_state('ignore'): # We're ignoring a whole if/elseif/else/endif block # because it lies within an inactive block. replace_state(ignore_else_block) elif not top_state('used_if'): replace_state(if_active_else) else: replace_state(if_inactive_else) return m=re.match(r'^\s*\@\*\*\s*endif\s*(?:\#.*)?$',line) if m: if top_state('in_if_block') or top_state('in_ifelse_block'): self.pop_state() else: raise ParserSyntaxError('Found an endif without matching if at line %d'%lineno) return m=re.match(r'^\s*\@\*\*\s*insert\s*(\S.*?)\s*$',line) if m: if self.active: contents=self.require_file(m.group(1)) self._write(contents) return m=re.match(r'^\s*\@\*\*\s*include\s*(\S.*?)\s*$',line) if m: if self.active: ffilename=m.group(1) contents=self.require_file(ffilename) self.parse_lines(contents,ffilename) return m=re.match(r'^\s*\@\*\*.*',line) if m: raise ParserSyntaxError('Invalid \@** directive in line \"%s\". Ignoring line.\n'%(line,)) if self._states and not self.active: return # inside a disabled block # Replace text of the form @[VARNAME] with the contents of the # respective environment variable: (outline,n)=re.subn(r'\@\[((?:\n|[^\]])*)\]', lambda x: self.require_data(x.group(0)[2:-1]), line) if not isinstance(outline,str): raise TypeError('The re.subn returned a %s %s instead of a basestring.'%(type(outline).__name__,repr(outline))) self._write(outline) if lineno>self.max_lines: raise ParserLineLimit('Read past max_lines=%d lines from input file. Something is probably wrong.'%self.max_lines)