From ccd1451a02231078fa6ebe2960847f93c7b62097 Mon Sep 17 00:00:00 2001 From: Dan Feeney Date: Sun, 8 Nov 2015 22:35:20 -0600 Subject: [PATCH] wrote tests for the convert_urls function of TextToHTMLparser improved regex to better handle edge cases, particularly a period following the URL --- evennia/utils/tests.py | 79 ++++++++++++++++++++++++++++++++++++++ evennia/utils/text2html.py | 4 +- 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/evennia/utils/tests.py b/evennia/utils/tests.py index 6644711064..4cf0481ac6 100644 --- a/evennia/utils/tests.py +++ b/evennia/utils/tests.py @@ -230,3 +230,82 @@ class TestMLen(TestCase): def test_dict(self): self.assertEqual(utils.m_len({'hello': True, 'Goodbye': False}), 2) + + +from .text2html import TextToHTMLparser + +class TestTextToHTMLparser(TestCase): + def setUp(self): + self.parser = TextToHTMLparser() + + def tearDown(self): + del self.parser + + def test_url_scheme_ftp(self): + self.assertEqual(self.parser.convert_urls('ftp.example.com'), + 'ftp.example.com') + + def test_url_scheme_www(self): + self.assertEqual(self.parser.convert_urls('www.example.com'), + 'www.example.com') + + def test_url_scheme_ftpproto(self): + self.assertEqual(self.parser.convert_urls('ftp://ftp.example.com'), + 'ftp://ftp.example.com') + + def test_url_scheme_http(self): + self.assertEqual(self.parser.convert_urls('http://example.com'), + 'http://example.com') + + def test_url_scheme_https(self): + self.assertEqual(self.parser.convert_urls('https://example.com'), + 'https://example.com') + + def test_url_chars_slash(self): + self.assertEqual(self.parser.convert_urls('www.example.com/homedir'), + 'www.example.com/homedir') + + def test_url_chars_colon(self): + self.assertEqual(self.parser.convert_urls('https://example.com:8000/login/'), + 'https://example.com:8000/login/') + + def test_url_chars_querystring(self): + self.assertEqual(self.parser.convert_urls('https://example.com/submitform?field1=val1+val3&field2=val2'), + 'https://example.com/submitform?field1=val1+val3&field2=val2') + + def test_url_chars_anchor(self): + self.assertEqual(self.parser.convert_urls('http://www.example.com/menu#section_1'), + 'http://www.example.com/menu#section_1') + + def test_url_chars_exclam(self): + self.assertEqual(self.parser.convert_urls('https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve'), + 'https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve') + + def test_url_edge_leadingw(self): + self.assertEqual(self.parser.convert_urls('wwww.example.com'), + 'wwww.example.com') + + def test_url_edge_following_period_eol(self): + self.assertEqual(self.parser.convert_urls('www.example.com.'), + 'www.example.com.') + + def test_url_edge_following_period(self): + self.assertEqual(self.parser.convert_urls('see www.example.com. '), + 'see www.example.com. ') + + def test_url_edge_brackets(self): + self.assertEqual(self.parser.convert_urls('[http://example.com/]'), + '[http://example.com/]') + + def test_url_edge_multiline(self): + self.assertEqual(self.parser.convert_urls(' * http://example.com/info\n * bullet'), + ' * http://example.com/info\n * bullet') + + def test_url_edge_following_htmlentity(self): + self.assertEqual(self.parser.convert_urls('http://example.com/info<span>'), + 'http://example.com/info<span>') + + def test_url_edge_surrounded_spans(self): + self.assertEqual(self.parser.convert_urls('http://example.com/'), + 'http://example.com/') + diff --git a/evennia/utils/text2html.py b/evennia/utils/text2html.py index e49cbdca24..f8fc64c202 100644 --- a/evennia/utils/text2html.py +++ b/evennia/utils/text2html.py @@ -81,7 +81,7 @@ class TextToHTMLparser(object): re_uline = re.compile("(?:%s)(.*?)(?=%s)" % (ANSI_UNDERLINE.replace("[", r"\["), fgstop)) re_string = re.compile(r'(?P[<&>])|(?P [ \t]+)|(?P\r\n|\r|\n)', re.S|re.M|re.I) re_link = re.compile(r'\{lc(.*?)\{lt(.*?)\{le', re.DOTALL) - re_url = re.compile(r'((ftp|www|https|http)\W+[^"\',;$*^\\()[\]{}<>\s]+)') + re_url = re.compile(r'((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)') def re_color(self, text): """ @@ -184,7 +184,7 @@ class TextToHTMLparser(object): """ # -> added target to output prevent the web browser from attempting to # change pages (and losing our webclient session). - return re_url.sub(r'\1', text) + return self.re_url.sub(r'\1\2', text) def convert_links(self, text): """