0001"""
0002Iterator based sre token scanner
0003"""
0004import sre_parse, sre_compile, sre_constants
0005from sre_constants import BRANCH, SUBPATTERN
0006from re import VERBOSE, MULTILINE, DOTALL
0007import re
0008
0009__all__ = ['Scanner', 'pattern']
0010
0011FLAGS = (VERBOSE | MULTILINE | DOTALL)
0012class Scanner(object):
0013 def __init__(self, lexicon, flags=FLAGS):
0014 self.actions = [None]
0015
0016 s = sre_parse.Pattern()
0017 s.flags = flags
0018 p = []
0019 for idx, token in enumerate(lexicon):
0020 phrase = token.pattern
0021 try:
0022 subpattern = sre_parse.SubPattern(s,
0023 [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
0024 except sre_constants.error:
0025 raise
0026 p.append(subpattern)
0027 self.actions.append(token)
0028
0029 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
0030 self.scanner = sre_compile.compile(p)
0031
0032
0033 def iterscan(self, string, idx=0, context=None):
0034 """
0035 Yield match, end_idx for each match
0036 """
0037 match = self.scanner.scanner(string, idx).match
0038 actions = self.actions
0039 lastend = idx
0040 end = len(string)
0041 while True:
0042 m = match()
0043 if m is None:
0044 break
0045 matchbegin, matchend = m.span()
0046 if lastend == matchend:
0047 break
0048 action = actions[m.lastindex]
0049 if action is not None:
0050 rval, next_pos = action(m, context)
0051 if next_pos is not None and next_pos != matchend:
0052
0053 matchend = next_pos
0054 match = self.scanner.scanner(string, matchend).match
0055 yield rval, matchend
0056 lastend = matchend
0057
0058def pattern(pattern, flags=FLAGS):
0059 def decorator(fn):
0060 fn.pattern = pattern
0061 fn.regex = re.compile(pattern, flags)
0062 return fn
0063 return decorator