Much improved inlinefunc regex; resolving #1498

This commit is contained in:
Griatch 2018-06-17 01:22:24 +02:00
parent a9e0ee3540
commit 266971567b

View file

@ -188,17 +188,20 @@ except AttributeError:
# regex definitions
_RE_STARTTOKEN = re.compile(r"(?<!\\)\$(\w+)\(") # unescaped $funcname{ (start of function call)
_RE_STARTTOKEN = re.compile(r"(?<!\\)\$(\w+)\(") # unescaped $funcname( (start of function call)
# note: this regex can be experimented with at https://regex101.com/r/kGR3vE/1
_RE_TOKEN = re.compile(r"""
(?<!\\)\'\'\'(?P<singlequote>.*?)(?<!\\)\'\'\'| # unescaped single-triples (escapes all inside them)
(?<!\\)\"\"\"(?P<doublequote>.*?)(?<!\\)\"\"\"| # unescaped normal triple quotes (escapes all inside them)
(?P<comma>(?<!\\)\,)| # unescaped , (argument separator)
(?P<end>(?<!\\)\))| # unescaped ) (end of function call)
(?P<start>(?<!\\)\$\w+\()| # unescaped $funcname( (start of function call)
(?P<escaped>\\'|\\"|\\\)|\\$\w+\()| # escaped tokens should re-appear in text
(?P<rest>[\w\s.-\/#!%\^&\*;:=\-_`~\|\(}{\[\]@\$\\\+\<\>?]+|\"{1}|\'{1}) # everything else """,
re.UNICODE + re.IGNORECASE + re.VERBOSE + re.DOTALL)
(?<!\\)\'\'\'(?P<singlequote>.*?)(?<!\\)\'\'\'| # single-triplets escape all inside
(?<!\\)\"\"\"(?P<doublequote>.*?)(?<!\\)\"\"\"| # double-triplets escape all inside
(?P<comma>(?<!\\)\,)| # , (argument sep)
(?P<end>(?<!\\)\))| # ) (end of func call)
(?P<start>(?<!\\)\$\w+\()| # $funcname (start of func call)
(?P<escaped> # escaped tokens to re-insert sans backslash
\\\'|\\\"|\\\)|\\\$\w+\()|
(?P<rest> # everything else to re-insert verbatim
\$(?!\w+\()|\'{1}|\"{1}|\\{1}|[^),$\'\"\\]+)""",
re.UNICODE | re.IGNORECASE | re.VERBOSE | re.DOTALL)
# Cache for function lookups.
_PARSING_CACHE = utils.LimitedSizeOrderedDict(size_limit=1000)
@ -293,6 +296,7 @@ def parse_inlinefunc(string, strip=False, available_funcs=None, **kwargs):
ncallable = 0
for match in _RE_TOKEN.finditer(string):
gdict = match.groupdict()
print("match: {}".format({key: val for key, val in gdict.items() if val}))
if gdict["singlequote"]:
stack.append(gdict["singlequote"])
elif gdict["doublequote"]: