evennia/evennia/utils/text2html.py


"""
ANSI -> html converter

Credit for original idea and implementation
goes to Muhammad Alkarouri and his
snippet #577349 on http://code.activestate.com.

(extensively modified by Griatch 2010)
"""
from __future__ import absolute_import
from builtins import object

import re
import cgi
from .ansi import *

# All xterm256 RGB equivalents

XTERM256_FG = "\033[38;5;%sm"
XTERM256_BG = "\033[48;5;%sm"

class TextToHTMLparser(object):
    """
    This class describes a parser for converting from ANSI to html.
    """

    tabstop = 4
    # mapping html color name <-> ansi code.
    hilite = ANSI_HILITE
    unhilite = ANSI_UNHILITE
    normal = ANSI_NORMAL
    underline = ANSI_UNDERLINE
    colorcodes = [
            ('color-000', unhilite + ANSI_BLACK), # pure black
            ('color-001', unhilite + ANSI_RED),
            ('color-002', unhilite + ANSI_GREEN),
            ('color-003', unhilite + ANSI_YELLOW),
            ('color-004', unhilite + ANSI_BLUE),
            ('color-005', unhilite + ANSI_MAGENTA),
            ('color-006', unhilite + ANSI_CYAN),
            ('color-007', unhilite + ANSI_WHITE), # light grey
            ('color-008', hilite + ANSI_BLACK), # dark grey
            ('color-009', hilite + ANSI_RED),
            ('color-010', hilite + ANSI_GREEN),
            ('color-011', hilite + ANSI_YELLOW),
            ('color-012', hilite + ANSI_BLUE),
            ('color-013', hilite + ANSI_MAGENTA),
            ('color-014', hilite + ANSI_CYAN),
            ('color-015', hilite + ANSI_WHITE)  # pure white
        ] + [("color-%03i" % (i+16), XTERM256_FG % ("%03i" % (i+16))) for i in xrange(240)]

    colorback = [
            ('bgcolor-000', ANSI_BACK_BLACK), # pure black
            ('bgcolor-001', ANSI_BACK_RED),
            ('bgcolor-002', ANSI_BACK_GREEN),
            ('bgcolor-003', ANSI_BACK_YELLOW),
            ('bgcolor-004', ANSI_BACK_BLUE),
            ('bgcolor-005', ANSI_BACK_MAGENTA),
            ('bgcolor-006', ANSI_BACK_CYAN),
            ('bgcolor-007', ANSI_BACK_WHITE), # light grey
            ('bgcolor-000', unhilite + ANSI_BLACK), # pure black
            ('bgcolor-001', unhilite + ANSI_RED),
            ('bgcolor-002', unhilite + ANSI_GREEN),
            ('bgcolor-003', unhilite + ANSI_YELLOW),
            ('bgcolor-004', unhilite + ANSI_BLUE),
            ('bgcolor-005', unhilite + ANSI_MAGENTA),
            ('bgcolor-006', unhilite + ANSI_CYAN),
            ('bgcolor-007', unhilite + ANSI_WHITE), # light grey
            ('bgcolor-008', hilite + ANSI_BLACK), # dark grey
            ('bgcolor-009', hilite + ANSI_RED),
            ('bgcolor-010', hilite + ANSI_GREEN),
            ('bgcolor-011', hilite + ANSI_YELLOW),
            ('bgcolor-012', hilite + ANSI_BLUE),
            ('bgcolor-013', hilite + ANSI_MAGENTA),
            ('bgcolor-014', hilite + ANSI_CYAN),
            ('bgcolor-015', hilite + ANSI_WHITE),  # pure white
    ] + [("bgcolor-%03i" % (i+16), XTERM256_BG % ("%03i" % (i+16))) for i in range(240)]

    # make sure to escape [
    colorcodes = [(c, code.replace("[", r"\[")) for c, code in colorcodes]
    colorback = [(c, code.replace("[", r"\[")) for c, code in colorback]
    # create stop markers
    fgstop = [("", c.replace("[", r"\[")) for c in (normal, hilite, underline)]
    bgstop = [("", c.replace("[", r"\[")) for c in (normal,)]
    fgstop = "|".join(co[1] for co in colorcodes + fgstop + [("", "$")])
    bgstop = "|".join(co[1] for co in colorback + bgstop + [("", "$")])

    # pre-compile regexes
    re_fgs = [(cname, re.compile("(?:%s)(.*?)(?=%s)" % (code, fgstop))) for cname, code in colorcodes]
    re_bgs = [(cname, re.compile("(?:%s)(.*?)(?=%s)" % (code, bgstop))) for cname, code in colorback]
    re_normal = re.compile(normal.replace("[", r"\["))
    re_hilite = re.compile("(?:%s)(.*)(?=%s)" % (hilite.replace("[", r"\["), fgstop))
    re_uline = re.compile("(?:%s)(.*?)(?=%s)" % (ANSI_UNDERLINE.replace("[", r"\["), fgstop))
    re_string = re.compile(r'(?P<htmlchars>[<&>])|(?P<space> [ \t]+)|(?P<lineend>\r\n|\r|\n)', re.S|re.M|re.I)
    re_url = re.compile(r'((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)')
    re_mxplink =  re.compile(r'\|lc(.*?)\|lt(.*?)\|le', re.DOTALL)

    def re_color(self, text):
        """
        Replace ansi colors with html color class names.  Let the
        client choose how it will display colors, if it wishes to.

        Args:
            text (str): the string with color to replace.

        Returns:
            text (str): Re-colored text.

        """
        for colorname, regex in self.re_fgs:
            text = regex.sub(r'''<span class="%s">\1</span>''' % colorname, text)
        for bgname, regex in self.re_bgs:
            text = regex.sub(r'''<span class="%s">\1</span>''' % bgname, text)
        text = self.re_normal.sub("", text)
        return text

    def re_bold(self, text):
        """
        Clean out superfluous hilights rather than set <strong>to make
        it match the look of telnet.

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        return self.re_hilite.sub(r'<strong>\1</strong>', text)

    def re_underline(self, text):
        """
        Replace ansi underline with html underline class name.

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        return self.re_uline.sub(r'<span class="underline">\1</span>', text)

    def remove_bells(self, text):
        """
        Remove ansi specials

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        return text.replace('\07', '')

    def remove_backspaces(self, text):
        """
        Removes special escape sequences

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        backspace_or_eol = r'(.\010)|(\033\[K)'
        n = 1
        while n > 0:
            text, n = re.subn(backspace_or_eol, '', text, 1)
        return text

    def convert_linebreaks(self, text):
        """
        Extra method for cleaning linebreaks

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        return text.replace(r'\n', r'<br>')

    def convert_urls(self, text):
        """
        Replace urls (http://...) by valid HTML.

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        # -> added target to output prevent the web browser from attempting to
        # change pages (and losing our webclient session).
        return self.re_url.sub(r'<a href="\1" target="_blank">\1</a>\2', text)

    def convert_links(self, text):
        """
        Replaces links with HTML code.

        Args:
            text (str): Text to process.

        Returns:
            text (str): Processed text.

        """
        return self.re_mxplink.sub(r"""<a id='mxplink' href='#' onclick='Evennia.msg("text",["\1"],{}); return false;'>\2</a>""", text)

    def do_sub(self, match):
        """
        Helper method to be passed to re.sub,
        for handling all substitutions.

        Args:
            match (re.Matchobject): Match for substitution.

        Returns:
            text (str): Processed text.

        """
        cdict = match.groupdict()
        if cdict['htmlchars']:
            return cgi.escape(cdict['htmlchars'])
        if cdict['lineend']:
            return '<br>'
        elif cdict['space'] == '\t':
            return ' ' * self.tabstop
        elif cdict['space']:
            text = match.group().replace('\t', '&nbsp;' * self.tabstop)
            text = text.replace(' ', '&nbsp;')
            return text

    def parse(self, text, strip_ansi=False):
        """
        Main access function, converts a text containing ANSI codes
        into html statements.

        Args:
            text (str): Text to process.
            strip_ansi (bool, optional):

        Returns:
            text (str): Parsed text.
        """
        # parse everything to ansi first
        text = parse_ansi(text, strip_ansi=strip_ansi, xterm256=True, mxp=True)
        # convert all ansi to html
        result = re.sub(self.re_string, self.do_sub, text)
        result = self.re_color(result)
        result = self.re_bold(result)
        result = self.re_underline(result)
        result = self.remove_bells(result)
        result = self.convert_linebreaks(result)
        result = self.remove_backspaces(result)
        result = self.convert_urls(result)
        result = self.convert_links(result)
        # clean out eventual ansi that was missed
        #result = parse_ansi(result, strip_ansi=True)

        return result

HTML_PARSER = TextToHTMLparser()


#
# Access function
#

def parse_html(string, strip_ansi=False, parser=HTML_PARSER):
    """
    Parses a string, replace ANSI markup with html
    """
    return parser.parse(string, strip_ansi=strip_ansi)