evennia/src/utils/text2html.py


"""
ANSI -> html converter

Credit for original idea and implementation 
goes to Muhammad Alkarouri and his 
snippet #577349 on http://code.activestate.com.

(extensively modified by Griatch 2010)
"""

import re
import cgi
from src.utils import ansi

class TextToHTMLparser(object):
    """
    This class describes a parser for converting from ansi to html.
    """    
    
    # mapping html color name <-> ansi code. 
    # Obs order matters - longer ansi codes are replaced first.
    colorcodes = [('white', '\033[1m\033[37m'),
                  ('cyan', '\033[1m\033[36m'),
                  ('blue', '\033[1m\033[34m'),
                  ('red', '\033[1m\033[31m'),
                  ('magenta', '\033[1m\033[35m'),
                  ('lime', '\033[1m\033[32m'), 
                  ('yellow', '\033[1m\033[33m'),
                  ('gray', '\033[37m'),
                  ('teal', '\033[36m'), 
                  ('navy', '\033[34m'),
                  ('maroon', '\033[31m'),
                  ('purple', '\033[35m'),
                  ('green', '\033[32m'),
                  ('olive', '\033[33m')]
    normalcode = '\033[0m'                
    tabstop = 4

    re_string = re.compile(r'(?P<htmlchars>[<&>])|(?P<space>^[ \t]+)|(?P<lineend>\r\n|\r|\n)|(?P<protocol>(^|\s)((http|ftp)://.*?))(\s|$)', 
                           re.S|re.M|re.I)

    def re_color(self, text):
        "Replace ansi colors with html color tags"
        for colorname, code in self.colorcodes:
            regexp = "(?:%s)(.*?)(?:%s)" % (code, self.normalcode)
            regexp = regexp.replace('[', r'\[')
            text = re.sub(regexp, r'''<span style="color: %s">\1</span>''' % colorname, text)
        return text

    def re_bold(self, text):
        "Replace ansi hilight with bold text"
        regexp = "(?:%s)(.*?)(?:%s)" % ('\033[1m', self.normalcode)
        regexp = regexp.replace('[', r'\[')
        return re.sub(regexp, r'<span style="font-weight:bold">\1</span>', text)

    def re_underline(self, text):
        "Replace ansi underline with html equivalent"
        regexp = "(?:%s)(.*?)(?:%s)" % ('\033[4m', self.normalcode)
        regexp = regexp.replace('[', r'\[')
        return re.sub(regexp, r'<span style="text-decoration: underline">\1</span>', text)

    def remove_bells(self, text):
        "Remove ansi specials"
        return text.replace('\07', '')

    def remove_backspaces(self, text):
        "Removes special escape sequences"
        backspace_or_eol = r'(.\010)|(\033\[K)'
        n = 1
        while n > 0:
            text, n = re.subn(backspace_or_eol, '', text, 1)
        return text

    def convert_linebreaks(self, text):
        "Extra method for cleaning linebreaks"
        return text.replace(r'\n', r'<br>')

    def convert_urls(self, text):
        "Replace urls (http://...) by valid HTML"
        regexp = r"((ftp|www|http)(\W+\S+[^).,:;?\]\}(\<span\>) \r\n$]+))"
        return re.sub(regexp, r'<a href="\1">\1</a>', text)

    def do_sub(self, m):
        "Helper method to be passed to re.sub."
        c = m.groupdict()
        if c['htmlchars']:
            return cgi.escape(c['htmlchars'])
        if c['lineend']:
            return '<br>'
        elif c['space']:
            t = m.group().replace('\t', '&nbsp;'*self.tabstop)
            t = t.replace(' ', '&nbsp;')
            return t
        elif c['space'] == '\t':
            return ' '*self.tabstop
        else:
            url = m.group('protocol')
            if url.startswith(' '):
                prefix = ' '
                url = url[1:]
            else:
                prefix = ''
            last = m.groups()[-1]
            if last in ['\n', '\r', '\r\n']:
                last = '<br>'
            return '%s%s' % (prefix, url)

    def parse(self, text):
        """
        Main access function, converts a text containing 
        ansi codes into html statements. 
        """

        # parse everything to ansi first 
        text = ansi.parse_ansi(text)

        # convert all ansi to html
        result = re.sub(self.re_string, self.do_sub, text)
        result = self.re_color(result)
        result = self.re_bold(result)
        result = self.re_underline(result)
        result = self.remove_bells(result)
        result = self.convert_linebreaks(result)
        result = self.remove_backspaces(result)
        result = self.convert_urls(result)

        # clean out eventual ansi that was missed
        result = ansi.parse_ansi(result, strip_ansi=True)
    
        return result 

HTML_PARSER = TextToHTMLparser()

#
# Access function
#

def parse_html(string, parser=HTML_PARSER):
    """
    Parses a string, replace ansi markup with html
    """
    return parser.parse(string)
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00
			`"""`
			`ANSI -> html converter`

			`Credit for original idea and implementation`
			`goes to Muhammad Alkarouri and his`
			`snippet #577349 on http://code.activestate.com.`

			`(extensively modified by Griatch 2010)`
			`"""`

			`import re`
			`import cgi`
			`from src.utils import ansi`

Fixed a string error in the webclient that could appear on some server setups. Added URL parsing to the webclient. 2010-12-07 21:46:26 +00:00			`class TextToHTMLparser(object):`
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00			`"""`
			`This class describes a parser for converting from ansi to html.`
			`"""`

			`# mapping html color name <-> ansi code.`
			`# Obs order matters - longer ansi codes are replaced first.`
			`colorcodes = [('white', '\033[1m\033[37m'),`
			`('cyan', '\033[1m\033[36m'),`
			`('blue', '\033[1m\033[34m'),`
			`('red', '\033[1m\033[31m'),`
			`('magenta', '\033[1m\033[35m'),`
			`('lime', '\033[1m\033[32m'),`
			`('yellow', '\033[1m\033[33m'),`
			`('gray', '\033[37m'),`
			`('teal', '\033[36m'),`
			`('navy', '\033[34m'),`
			`('maroon', '\033[31m'),`
			`('purple', '\033[35m'),`
			`('green', '\033[32m'),`
			`('olive', '\033[33m')]`
			`normalcode = '\033[0m'`
			`tabstop = 4`

Cleaned up the webclient and changed how it handles sessions and identifies with the server. Fixed some reported bugs caused by the changed layout of sessionhandler. 2010-12-11 13:37:26 +00:00			`re_string = re.compile(r'(?P<htmlchars>[<&>])\|(?P<space>^[ \t]+)\|(?P<lineend>\r\n\|\r\|\n)\|(?P<protocol>(^\|\s)((http\|ftp)://.*?))(\s\|$)',`
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00			`re.S\|re.M\|re.I)`

			`def re_color(self, text):`
			`"Replace ansi colors with html color tags"`
			`for colorname, code in self.colorcodes:`
			`regexp = "(?:%s)(.*?)(?:%s)" % (code, self.normalcode)`
			`regexp = regexp.replace('[', r'\[')`
			`text = re.sub(regexp, r'''<span style="color: %s">\1</span>''' % colorname, text)`
			`return text`

			`def re_bold(self, text):`
			`"Replace ansi hilight with bold text"`
			`regexp = "(?:%s)(.*?)(?:%s)" % ('\033[1m', self.normalcode)`
			`regexp = regexp.replace('[', r'\[')`
			`return re.sub(regexp, r'<span style="font-weight:bold">\1</span>', text)`

			`def re_underline(self, text):`
			`"Replace ansi underline with html equivalent"`
			`regexp = "(?:%s)(.*?)(?:%s)" % ('\033[4m', self.normalcode)`
			`regexp = regexp.replace('[', r'\[')`
			`return re.sub(regexp, r'<span style="text-decoration: underline">\1</span>', text)`

			`def remove_bells(self, text):`
			`"Remove ansi specials"`
			`return text.replace('\07', '')`

			`def remove_backspaces(self, text):`
			`"Removes special escape sequences"`
			`backspace_or_eol = r'(.\010)\|(\033\[K)'`
			`n = 1`
			`while n > 0:`
			`text, n = re.subn(backspace_or_eol, '', text, 1)`
			`return text`

			`def convert_linebreaks(self, text):`
			`"Extra method for cleaning linebreaks"`
			`return text.replace(r'\n', r'<br>')`

A file was not properly added in the last commit. Added now. 2010-12-07 22:27:50 +00:00			`def convert_urls(self, text):`
			`"Replace urls (http://...) by valid HTML"`
			`regexp = r"((ftp\|www\|http)(\W+\S+[^).,:;?\]\}(\<span\>) \r\n$]+))"`
			`return re.sub(regexp, r'<a href="\1">\1</a>', text)`

Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00			`def do_sub(self, m):`
			`"Helper method to be passed to re.sub."`
			`c = m.groupdict()`
			`if c['htmlchars']:`
			`return cgi.escape(c['htmlchars'])`
			`if c['lineend']:`
			`return '<br>'`
			`elif c['space']:`
			`t = m.group().replace('\t', ' '*self.tabstop)`
			`t = t.replace(' ', ' ')`
			`return t`
			`elif c['space'] == '\t':`
			`return ' '*self.tabstop`
			`else:`
Cleaned up the webclient and changed how it handles sessions and identifies with the server. Fixed some reported bugs caused by the changed layout of sessionhandler. 2010-12-11 13:37:26 +00:00			`url = m.group('protocol')`
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00			`if url.startswith(' '):`
			`prefix = ' '`
			`url = url[1:]`
			`else:`
			`prefix = ''`
			`last = m.groups()[-1]`
			`if last in ['\n', '\r', '\r\n']:`
			`last = '<br>'`
			`return '%s%s' % (prefix, url)`

			`def parse(self, text):`
			`"""`
			`Main access function, converts a text containing`
			`ansi codes into html statements.`
			`"""`

			`# parse everything to ansi first`
			`text = ansi.parse_ansi(text)`

			`# convert all ansi to html`
			`result = re.sub(self.re_string, self.do_sub, text)`
			`result = self.re_color(result)`
			`result = self.re_bold(result)`
			`result = self.re_underline(result)`
			`result = self.remove_bells(result)`
			`result = self.convert_linebreaks(result)`
			`result = self.remove_backspaces(result)`
A file was not properly added in the last commit. Added now. 2010-12-07 22:27:50 +00:00			`result = self.convert_urls(result)`
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00
			`# clean out eventual ansi that was missed`
			`result = ansi.parse_ansi(result, strip_ansi=True)`

			`return result`

A file was not properly added in the last commit. Added now. 2010-12-07 22:27:50 +00:00			`HTML_PARSER = TextToHTMLparser()`
Evennia now runs on its own Twisted webserver (no need for testserver or Apache if you don't want to). Evennia now also has an ajax long-polling web client running from Twisted. The web client requires no extra dependencies beyond jQuery which is included. The src/server structure has been r cleaned up and rewritten to make it easier to add new protocols in the future - all new protocols need to inherit from server.session.Session, whi ch implements a set of hooks that Evennia uses to communicate. The current web client protocol is functional but does not implement any of rcaskey 's suggestions as of yet - it uses a separate data object passed through msg() to communicate between the server and the various protocols. Also the client itself could probably need cleanup and 'prettification'. The fact that the system runs a hybrid of Django and Twisted, getting the best of both worlds should allow for many possibilities in the future. /Griatch 2010-12-07 02:34:59 +00:00
			`#`
			`# Access function`
			`#`

			`def parse_html(string, parser=HTML_PARSER):`
			`"""`
			`Parses a string, replace ansi markup with html`
			`"""`
			`return parser.parse(string)`