"""
ANSI -> html converter
Credit for original idea and implementation
goes to Muhammad Alkarouri and his
snippet #577349 on http://code.activestate.com.
(extensively modified by Griatch 2010)
"""
import re
import cgi
from ansi import *
class TextToHTMLparser(object):
"""
This class describes a parser for converting from ansi to html.
"""
tabstop = 4
# mapping html color name <-> ansi code.
hilite = ANSI_HILITE
normal = ANSI_NORMAL
underline = ANSI_UNDERLINE
colorcodes = [
('red', hilite + ANSI_RED),
('maroon', ANSI_RED),
('lime', hilite + ANSI_GREEN),
('green', ANSI_GREEN),
('yellow', hilite + ANSI_YELLOW),
('olive', ANSI_YELLOW),
('blue', hilite + ANSI_BLUE),
('navy', ANSI_BLUE),
('magenta', hilite + ANSI_MAGENTA),
('purple', ANSI_MAGENTA),
('cyan', hilite + ANSI_CYAN),
('teal', ANSI_CYAN),
('white', hilite + ANSI_WHITE), # pure white
('gray', ANSI_WHITE), #light grey
('dimgray', hilite + ANSI_BLACK), #dark grey
('black', ANSI_BLACK), #pure black
]
colorback = [
('bgred', hilite + ANSI_BACK_RED),
('bgmaroon', ANSI_BACK_RED),
('bglime', hilite + ANSI_BACK_GREEN),
('bggreen', ANSI_BACK_GREEN),
('bgyellow', hilite + ANSI_BACK_YELLOW),
('bgolive', ANSI_BACK_YELLOW),
('bgblue', hilite + ANSI_BACK_BLUE),
('bgnavy', ANSI_BACK_BLUE),
('bgmagenta', hilite + ANSI_BACK_MAGENTA),
('bgpurple', ANSI_BACK_MAGENTA),
('bgcyan', hilite + ANSI_BACK_CYAN),
('bgteal', ANSI_BACK_CYAN),
('bgwhite', hilite + ANSI_BACK_WHITE),
('bggray', ANSI_BACK_WHITE),
('bgdimgray', hilite + ANSI_BACK_BLACK),
('bgblack', ANSI_BACK_BLACK),
]
# make sure to escape [
colorcodes = [(c, code.replace("[",r"\[")) for c, code in colorcodes]
colorback = [(c, code.replace("[",r"\[")) for c, code in colorback]
# create stop markers
fgstop = [("", c.replace("[", r"\[")) for c in (normal, hilite, underline)]
bgstop = [("", c.replace("[", r"\[")) for c in (normal,)]
fgstop = "|".join(co[1] for co in colorcodes + fgstop + [("", "$")])
bgstop = "|".join(co[1] for co in colorback + bgstop + [("", "$")])
# pre-compile regexes
re_fgs = [(cname, re.compile("(?:%s)(.*?)(?=%s)" % (code, fgstop))) for cname, code in colorcodes]
re_bgs = [(cname, re.compile("(?:%s)(.*?)(?=%s)" % (code, bgstop))) for cname, code in colorback]
re_normal = re.compile(normal.replace("[", r"\["))
re_hilite = re.compile("(?:%s)(.*)(?=%s)" % (hilite.replace("[", r"\["), fgstop))
re_uline = re.compile("(?:%s)(.*?)(?=%s)" % (ANSI_UNDERLINE.replace("[",r"\["), fgstop))
re_string = re.compile(r'(?P[<&>])|(?P^[ \t]+)|(?P\r\n|\r|\n)', re.S|re.M|re.I)
def re_color(self, text):
"""
Replace ansi colors with html color class names.
Let the client choose how it will display colors, if it wishes to. """
for colorname, regex in self.re_fgs:
text = regex.sub(r'''\1''' % colorname, text)
for bgname, regex in self.re_bgs:
text = regex.sub(r'''\1''' % bgname, text)
return self.re_normal.sub("", text)
def re_bold(self, text):
"Clean out superfluous hilights rather than set to make it match the look of telnet."
return self.re_hilite.sub(r'\1', text)
def re_underline(self, text):
"Replace ansi underline with html underline class name."
return self.re_uline.sub(r'\1', text)
def remove_bells(self, text):
"Remove ansi specials"
return text.replace('\07', '')
def remove_backspaces(self, text):
"Removes special escape sequences"
backspace_or_eol = r'(.\010)|(\033\[K)'
n = 1
while n > 0:
text, n = re.subn(backspace_or_eol, '', text, 1)
return text
def convert_linebreaks(self, text):
"Extra method for cleaning linebreaks"
return text.replace(r'\n', r'
')
def convert_urls(self, text):
"Replace urls (http://...) by valid HTML"
regexp = r"((ftp|www|http)(\W+\S+[^).,:;?\]\}(\) \r\n$]+))"
# -> added target to output prevent the web browser from attempting to
# change pages (and losing our webclient session).
return re.sub(regexp, r'\1', text)
def do_sub(self, m):
"Helper method to be passed to re.sub."
c = m.groupdict()
if c['htmlchars']:
return cgi.escape(c['htmlchars'])
if c['lineend']:
return '
'
elif c['space'] == '\t':
return ' '*self.tabstop
elif c['space']:
t = m.group().replace('\t', ' '*self.tabstop)
t = t.replace(' ', ' ')
return t
def parse(self, text, strip_ansi=False):
"""
Main access function, converts a text containing
ansi codes into html statements.
"""
# parse everything to ansi first
text = parse_ansi(text, strip_ansi=strip_ansi, xterm256=False)
# convert all ansi to html
result = re.sub(self.re_string, self.do_sub, text)
result = self.re_color(result)
result = self.re_bold(result)
result = self.re_underline(result)
result = self.remove_bells(result)
result = self.convert_linebreaks(result)
result = self.remove_backspaces(result)
result = self.convert_urls(result)
# clean out eventual ansi that was missed
#result = parse_ansi(result, strip_ansi=True)
return result
HTML_PARSER = TextToHTMLparser()
#
# Access function
#
def parse_html(string, strip_ansi=False, parser=HTML_PARSER):
"""
Parses a string, replace ansi markup with html
"""
return parser.parse(string, strip_ansi=strip_ansi)