Merge branch 'text2html_rewrite' of https://github.com/InspectorCaracal/evennia into InspectorCaracal-text2html_rewrite

This commit is contained in:
Griatch 2022-06-04 13:04:30 +02:00
commit 5df72530c6
2 changed files with 202 additions and 232 deletions

View file

@ -7,20 +7,20 @@ import mock
class TestText2Html(TestCase):
def test_re_color(self):
def test_format_styles(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.re_color("foo"))
self.assertEqual("foo", parser.format_styles("foo"))
self.assertEqual(
'<span class="color-001">red</span>foo',
parser.re_color(ansi.ANSI_UNHILITE + ansi.ANSI_RED + "red" + ansi.ANSI_NORMAL + "foo"),
parser.format_styles(ansi.ANSI_UNHILITE + ansi.ANSI_RED + "red" + ansi.ANSI_NORMAL + "foo"),
)
self.assertEqual(
'<span class="bgcolor-001">red</span>foo',
parser.re_color(ansi.ANSI_BACK_RED + "red" + ansi.ANSI_NORMAL + "foo"),
parser.format_styles(ansi.ANSI_BACK_RED + "red" + ansi.ANSI_NORMAL + "foo"),
)
self.assertEqual(
'<span class="bgcolor-001"><span class="color-002">red</span></span>foo',
parser.re_color(
'<span class="bgcolor-001 color-002">red</span>foo',
parser.format_styles(
ansi.ANSI_BACK_RED
+ ansi.ANSI_UNHILITE
+ ansi.ANSI_GREEN
@ -29,63 +29,37 @@ class TestText2Html(TestCase):
+ "foo"
),
)
@unittest.skip("parser issues")
def test_re_bold(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.re_bold("foo"))
self.assertEqual(
# "a <strong>red</strong>foo", # TODO: why not?
"a <strong>redfoo</strong>",
parser.re_bold("a " + ansi.ANSI_HILITE + "red" + ansi.ANSI_UNHILITE + "foo"),
)
@unittest.skip("parser issues")
def test_re_underline(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.re_underline("foo"))
self.assertEqual(
'a <span class="underline">red</span>' + ansi.ANSI_NORMAL + "foo",
parser.re_underline(
'a <span class="underline">red</span>foo',
parser.format_styles(
"a "
+ ansi.ANSI_UNDERLINE
+ "red"
+ ansi.ANSI_NORMAL # TODO: why does it keep it?
+ ansi.ANSI_NORMAL
+ "foo"
),
)
@unittest.skip("parser issues")
def test_re_blinking(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.re_blinking("foo"))
self.assertEqual(
'a <span class="blink">red</span>' + ansi.ANSI_NORMAL + "foo",
parser.re_blinking(
'a <span class="blink">red</span>foo',
parser.format_styles(
"a "
+ ansi.ANSI_BLINK
+ "red"
+ ansi.ANSI_NORMAL # TODO: why does it keep it?
+ ansi.ANSI_NORMAL
+ "foo"
),
)
@unittest.skip("parser issues")
def test_re_inversing(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.re_inversing("foo"))
self.assertEqual(
'a <span class="inverse">red</span>' + ansi.ANSI_NORMAL + "foo",
parser.re_inversing(
'a <span class="bgcolor-007 color-000">red</span>foo',
parser.format_styles(
"a "
+ ansi.ANSI_INVERSE
+ "red"
+ ansi.ANSI_NORMAL # TODO: why does it keep it?
+ ansi.ANSI_NORMAL
+ "foo"
),
)
@unittest.skip("parser issues")
def test_remove_bells(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.remove_bells("foo"))
@ -95,7 +69,7 @@ class TestText2Html(TestCase):
"a "
+ ansi.ANSI_BEEP
+ "red"
+ ansi.ANSI_NORMAL # TODO: why does it keep it?
+ ansi.ANSI_NORMAL
+ "foo"
),
)
@ -110,7 +84,6 @@ class TestText2Html(TestCase):
self.assertEqual("foo", parser.convert_linebreaks("foo"))
self.assertEqual("a<br> redfoo<br>", parser.convert_linebreaks("a\n redfoo\n"))
@unittest.skip("parser issues")
def test_convert_urls(self):
parser = text2html.HTML_PARSER
self.assertEqual("foo", parser.convert_urls("foo"))
@ -118,7 +91,6 @@ class TestText2Html(TestCase):
'a <a href="http://redfoo" target="_blank">http://redfoo</a> runs',
parser.convert_urls("a http://redfoo runs"),
)
# TODO: doesn't URL encode correctly
def test_sub_mxp_links(self):
parser = text2html.HTML_PARSER
@ -186,22 +158,22 @@ class TestText2Html(TestCase):
self.assertEqual("foo", text2html.parse_html("foo"))
self.maxDiff = None
self.assertEqual(
# TODO: note that the blink is currently *not* correctly aborted
# with |n here! This is probably not possible to correctly handle
# with regex - a stateful parser may be needed.
# blink back-cyan normal underline red green yellow blue magenta cyan back-green
text2html.parse_html("|^|[CHello|n|u|rW|go|yr|bl|md|c!|[G!"),
'<span class="blink">'
'<span class="bgcolor-006">Hello</span>' # noqa
'<span class="underline">'
'<span class="color-009">W</span>' # noqa
'<span class="color-010">o</span>'
'<span class="color-011">r</span>'
'<span class="color-012">l</span>'
'<span class="color-013">d</span>'
'<span class="color-014">!'
'<span class="bgcolor-002">!</span>' # noqa
"</span>"
"</span>"
"</span>",
'<span class="blink bgcolor-006">'
'Hello'
'</span><span class="underline color-009">'
'W'
'</span><span class="underline color-010">'
'o'
'</span><span class="underline color-011">'
'r'
'</span><span class="underline color-012">'
'l'
'</span><span class="underline color-013">'
'd'
'</span><span class="underline color-014">'
'!'
'</span><span class="underline bgcolor-002 color-014">'
'!'
'</span>',
)

View file

@ -12,11 +12,10 @@ import re
from html import escape as html_escape
from .ansi import *
# All xterm256 RGB equivalents
XTERM256_FG = "\033[38;5;%sm"
XTERM256_BG = "\033[48;5;%sm"
XTERM256_FG = "\033[38;5;{}m"
XTERM256_BG = "\033[48;5;{}m"
class TextToHTMLparser(object):
@ -25,77 +24,55 @@ class TextToHTMLparser(object):
"""
tabstop = 4
# mapping html color name <-> ansi code.
hilite = ANSI_HILITE
unhilite = ANSI_UNHILITE # this will be stripped - there is no css equivalent.
normal = ANSI_NORMAL # "
underline = ANSI_UNDERLINE
blink = ANSI_BLINK
inverse = ANSI_INVERSE # this will produce an outline; no obvious css equivalent?
colorcodes = [
("color-000", unhilite + ANSI_BLACK), # pure black
("color-001", unhilite + ANSI_RED),
("color-002", unhilite + ANSI_GREEN),
("color-003", unhilite + ANSI_YELLOW),
("color-004", unhilite + ANSI_BLUE),
("color-005", unhilite + ANSI_MAGENTA),
("color-006", unhilite + ANSI_CYAN),
("color-007", unhilite + ANSI_WHITE), # light grey
("color-008", hilite + ANSI_BLACK), # dark grey
("color-009", hilite + ANSI_RED),
("color-010", hilite + ANSI_GREEN),
("color-011", hilite + ANSI_YELLOW),
("color-012", hilite + ANSI_BLUE),
("color-013", hilite + ANSI_MAGENTA),
("color-014", hilite + ANSI_CYAN),
("color-015", hilite + ANSI_WHITE), # pure white
] + [("color-%03i" % (i + 16), XTERM256_FG % ("%i" % (i + 16))) for i in range(240)]
colorback = [
("bgcolor-000", ANSI_BACK_BLACK), # pure black
("bgcolor-001", ANSI_BACK_RED),
("bgcolor-002", ANSI_BACK_GREEN),
("bgcolor-003", ANSI_BACK_YELLOW),
("bgcolor-004", ANSI_BACK_BLUE),
("bgcolor-005", ANSI_BACK_MAGENTA),
("bgcolor-006", ANSI_BACK_CYAN),
("bgcolor-007", ANSI_BACK_WHITE), # light grey
("bgcolor-008", hilite + ANSI_BACK_BLACK), # dark grey
("bgcolor-009", hilite + ANSI_BACK_RED),
("bgcolor-010", hilite + ANSI_BACK_GREEN),
("bgcolor-011", hilite + ANSI_BACK_YELLOW),
("bgcolor-012", hilite + ANSI_BACK_BLUE),
("bgcolor-013", hilite + ANSI_BACK_MAGENTA),
("bgcolor-014", hilite + ANSI_BACK_CYAN),
("bgcolor-015", hilite + ANSI_BACK_WHITE), # pure white
] + [("bgcolor-%03i" % (i + 16), XTERM256_BG % ("%i" % (i + 16))) for i in range(240)]
style_codes = [
# non-color style markers
ANSI_NORMAL,
ANSI_UNDERLINE,
ANSI_HILITE,
ANSI_UNHILITE,
ANSI_INVERSE,
ANSI_BLINK,
ANSI_INV_HILITE,
ANSI_BLINK_HILITE,
ANSI_INV_BLINK,
ANSI_INV_BLINK_HILITE,
]
ansi_color_codes = [
# Foreground colors
ANSI_BLACK,
ANSI_RED,
ANSI_GREEN,
ANSI_YELLOW,
ANSI_BLUE,
ANSI_MAGENTA,
ANSI_CYAN,
ANSI_WHITE,
]
xterm_fg_codes = [ XTERM256_FG.format(i + 16) for i in range(240) ]
# make sure to escape [
# colorcodes = [(c, code.replace("[", r"\[")) for c, code in colorcodes]
# colorback = [(c, code.replace("[", r"\[")) for c, code in colorback]
fg_colormap = dict((code, clr) for clr, code in colorcodes)
bg_colormap = dict((code, clr) for clr, code in colorback)
ansi_bg_codes = [
# Background colors
ANSI_BACK_BLACK,
ANSI_BACK_RED,
ANSI_BACK_GREEN,
ANSI_BACK_YELLOW,
ANSI_BACK_BLUE,
ANSI_BACK_MAGENTA,
ANSI_BACK_CYAN,
ANSI_BACK_WHITE,
]
xterm_bg_codes = [ XTERM256_BG.format(i + 16) for i in range(240) ]
re_style = re.compile(r"({})".format('|'.join(style_codes + ansi_color_codes + xterm_fg_codes + ansi_bg_codes + xterm_bg_codes).replace("[",r"\[")))
# create stop markers
fgstop = "(?:\033\[1m|\033\[22m){0,1}\033\[3[0-8].*?m|\033\[0m|$"
bgstop = "(?:\033\[1m|\033\[22m){0,1}\033\[4[0-8].*?m|\033\[0m|$"
bgfgstop = bgstop[:-2] + fgstop
colorlist = [ ANSI_UNHILITE + code for code in ansi_color_codes ] + [ ANSI_HILITE + code for code in ansi_color_codes ] + xterm_fg_codes
fgstart = "((?:\033\[1m|\033\[22m){0,1}\033\[3[0-8].*?m)"
bgstart = "((?:\033\[1m|\033\[22m){0,1}\033\[4[0-8].*?m)"
bgfgstart = bgstart + r"((?:\033\[1m|\033\[22m){0,1}\033\[[3-4][0-8].*?m){0,1}"
bglist = ansi_bg_codes + [ ANSI_HILITE + code for code in ansi_bg_codes ] + xterm_bg_codes
# extract color markers, tagging the start marker and the text marked
re_fgs = re.compile(fgstart + "(.*?)(?=" + fgstop + ")")
re_bgs = re.compile(bgstart + "(.*?)(?=" + bgstop + ")")
re_bgfg = re.compile(bgfgstart + "(.*?)(?=" + bgfgstop + ")")
re_normal = re.compile(normal.replace("[", r"\["))
re_hilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (hilite.replace("[", r"\["), fgstop, bgstop))
re_unhilite = re.compile("(?:%s)(.*)(?=%s|%s)" % (unhilite.replace("[", r"\["), fgstop, bgstop))
re_uline = re.compile("(?:%s)(.*?)(?=%s|%s)" % (underline.replace("[", r"\["), fgstop, bgstop))
re_blink = re.compile("(?:%s)(.*?)(?=%s|%s)" % (blink.replace("[", r"\["), fgstop, bgstop))
re_inverse = re.compile("(?:%s)(.*?)(?=%s|%s)" % (inverse.replace("[", r"\["), fgstop, bgstop))
re_string = re.compile(
r"(?P<htmlchars>[<&>])|(?P<tab>[\t]+)|(?P<lineend>\r\n|\r|\n)",
re.S | re.M | re.I,
@ -106,100 +83,6 @@ class TextToHTMLparser(object):
re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL)
re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL)
def _sub_bgfg(self, colormatch):
# print("colormatch.groups()", colormatch.groups())
bgcode, fgcode, text = colormatch.groups()
if not fgcode:
ret = r"""<span class="%s">%s</span>""" % (
self.bg_colormap.get(bgcode, self.fg_colormap.get(bgcode, "err")),
text,
)
else:
ret = r"""<span class="%s"><span class="%s">%s</span></span>""" % (
self.bg_colormap.get(bgcode, self.fg_colormap.get(bgcode, "err")),
self.fg_colormap.get(fgcode, self.bg_colormap.get(fgcode, "err")),
text,
)
return ret
def _sub_fg(self, colormatch):
code, text = colormatch.groups()
return r"""<span class="%s">%s</span>""" % (self.fg_colormap.get(code, "err"), text)
def _sub_bg(self, colormatch):
code, text = colormatch.groups()
return r"""<span class="%s">%s</span>""" % (self.bg_colormap.get(code, "err"), text)
def re_color(self, text):
"""
Replace ansi colors with html color class names. Let the
client choose how it will display colors, if it wishes to.
Args:
text (str): the string with color to replace.
Returns:
text (str): Re-colored text.
"""
text = self.re_bgfg.sub(self._sub_bgfg, text)
text = self.re_fgs.sub(self._sub_fg, text)
text = self.re_bgs.sub(self._sub_bg, text)
text = self.re_normal.sub("", text)
return text
def re_bold(self, text):
"""
Clean out superfluous hilights rather than set <strong>to make
it match the look of telnet.
Args:
text (str): Text to process.
Returns:
text (str): Processed text.
"""
text = self.re_hilite.sub(r"<strong>\1</strong>", text)
return self.re_unhilite.sub(r"\1", text) # strip unhilite - there is no equivalent in css.
def re_underline(self, text):
"""
Replace ansi underline with html underline class name.
Args:
text (str): Text to process.
Returns:
text (str): Processed text.
"""
return self.re_uline.sub(r'<span class="underline">\1</span>', text)
def re_blinking(self, text):
"""
Replace ansi blink with custom blink css class
Args:
text (str): Text to process.
Returns:
text (str): Processed text.
"""
return self.re_blink.sub(r'<span class="blink">\1</span>', text)
def re_inversing(self, text):
"""
Replace ansi inverse with custom inverse css class
Args:
text (str): Text to process.
Returns:
text (str): Processed text.
"""
return self.re_inverse.sub(r'<span class="inverse">\1</span>', text)
def remove_bells(self, text):
"""
Remove ansi specials
@ -211,7 +94,7 @@ class TextToHTMLparser(object):
text (str): Processed text.
"""
return text.replace("\07", "")
return text.replace(ANSI_BEEP, "")
def remove_backspaces(self, text):
"""
@ -292,7 +175,7 @@ class TextToHTMLparser(object):
url=url, text=text
)
return val
def sub_text(self, match):
"""
Helper method to be passed to re.sub,
@ -314,6 +197,126 @@ class TextToHTMLparser(object):
text = cdict["tab"].replace("\t", " " * (self.tabstop))
return text
return None
def format_styles(self, text):
"""
Takes a string with parsed ANSI codes and replaces them with
HTML spans and CSS classes.
Args:
text (str): The string to process.
Returns:
text (str): Processed text.
"""
# split out the ANSI codes and clean out any empty items
str_list = [substr for substr in self.re_style.split(text) if substr]
# initialize all the flags and classes
classes = []
clean = True
inverse = False
# default color is light grey - unhilite + white
hilight = ANSI_UNHILITE
fg = ANSI_WHITE
# default bg is black
bg = ANSI_BACK_BLACK
for i, substr in enumerate(str_list):
# reset all current styling
if substr == ANSI_NORMAL and not clean:
# replace with close existing tag
str_list[i] = "</span>"
# reset to defaults
classes = []
clean = True
inverse = False
hilight = ANSI_UNHILITE
fg = ANSI_WHITE
bg = ANSI_BACK_BLACK
# change color
elif substr in self.ansi_color_codes + self.xterm_fg_codes:
# erase ANSI code from output
str_list[i] = ""
# set new color
fg = substr
# change bg color
elif substr in self.ansi_bg_codes + self.xterm_bg_codes:
# erase ANSI code from output
str_list[i] = ""
# set new bg
bg = substr
# non-color codes
elif substr in self.style_codes:
# erase ANSI code from output
str_list[i] = ""
# hilight codes
if substr in (ANSI_HILITE, ANSI_UNHILITE, ANSI_INV_HILITE, ANSI_INV_BLINK_HILITE):
# set new hilight status
hilight = ANSI_UNHILITE if substr == ANSI_UNHILITE else ANSI_HILITE
# inversion codes
if substr in (ANSI_INVERSE, ANSI_INV_HILITE, ANSI_INV_BLINK_HILITE):
inverse = True
# blink codes
if substr in (ANSI_BLINK, ANSI_BLINK_HILITE, ANSI_INV_BLINK_HILITE) and "blink" not in classes:
classes.append("blink")
# underline
if substr == ANSI_UNDERLINE and "underline" not in classes:
classes.append("underline")
else:
# normal text, add text back to list
if not str_list[i-1]:
# prior entry was cleared, which means style change
# get indices for the fg and bg codes
bg_index = self.bglist.index(bg)
try:
color_index = self.colorlist.index(hilight + fg)
except ValueError:
# xterm256 colors don't have the hilight codes
color_index = self.colorlist.index(fg)
if inverse:
# inverse means swap fg and bg indices
bg_class = "bgcolor-{}".format(str(color_index).rjust(3,"0"))
color_class = "color-{}".format(str(bg_index).rjust(3,"0"))
else:
# use fg and bg indices for classes
bg_class = "bgcolor-{}".format(str(bg_index).rjust(3,"0"))
color_class = "color-{}".format(str(color_index).rjust(3,"0"))
# black bg is the default, don't explicitly style
if bg_class != "bgcolor-000":
classes.append(bg_class)
# light grey text is the default, don't explicitly style
if color_class != "color-007":
classes.append(color_class)
# define the new style span
prefix = '<span class="{}">'.format(" ".join(classes))
# close any prior span
if not clean:
prefix = '</span>' + prefix
# add span to output
str_list[i-1] = prefix
# clean out color classes to easily update next time
classes = [cls for cls in classes if "color" not in cls]
# flag as currently being styled
clean = False
# close span if necessary
if not clean:
str_list.append("</span>")
# recombine back into string
return "".join(str_list)
def parse(self, text, strip_ansi=False):
"""
@ -328,19 +331,14 @@ class TextToHTMLparser(object):
text (str): Parsed text.
"""
# print(f"incoming text:\n{text}")
# parse everything to ansi first
text = parse_ansi(text, strip_ansi=strip_ansi, xterm256=True, mxp=True)
# convert all ansi to html
result = re.sub(self.re_string, self.sub_text, text)
result = re.sub(self.re_mxplink, self.sub_mxp_links, result)
result = re.sub(self.re_mxpurl, self.sub_mxp_urls, result)
result = self.re_color(result)
result = self.re_bold(result)
result = self.re_underline(result)
result = self.re_blinking(result)
result = self.re_inversing(result)
result = self.remove_bells(result)
result = self.format_styles(result)
result = self.convert_linebreaks(result)
result = self.remove_backspaces(result)
result = self.convert_urls(result)