fix: improve convert_urls

2026-03-16 21:06:30 +01:00 · 2022-10-03 14:50:18 -04:00 · 2022-10-03 14:50:18 -04:00 · 80878fb124
commit 80878fb124
parent f9ba117680
2 changed files with 26 additions and 15 deletions
--- a/evennia/utils/tests/test_tagparsing.py
+++ b/evennia/utils/tests/test_tagparsing.py
@ -250,13 +250,13 @@ class TestTextToHTMLparser(TestCase):
    def test_url_scheme_ftp(self):
        self.assertEqual(
            self.parser.convert_urls("ftp.example.com"),
-            '<a href="ftp.example.com" target="_blank">ftp.example.com</a>',
+            '<a href="//ftp.example.com" target="_blank">ftp.example.com</a>',
        )

    def test_url_scheme_www(self):
        self.assertEqual(
            self.parser.convert_urls("www.example.com"),
-            '<a href="www.example.com" target="_blank">www.example.com</a>',
+            '<a href="//www.example.com" target="_blank">www.example.com</a>',
        )

    def test_url_scheme_ftpproto(self):
@ -280,7 +280,7 @@ class TestTextToHTMLparser(TestCase):
    def test_url_chars_slash(self):
        self.assertEqual(
            self.parser.convert_urls("www.example.com/homedir"),
-            '<a href="www.example.com/homedir" target="_blank">www.example.com/homedir</a>',
+            '<a href="//www.example.com/homedir" target="_blank">www.example.com/homedir</a>',
        )

    def test_url_chars_colon(self):
@ -313,22 +313,16 @@ class TestTextToHTMLparser(TestCase):
            ' target="_blank">https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve</a>',
        )

-    def test_url_edge_leadingw(self):
-        self.assertEqual(
-            self.parser.convert_urls("wwww.example.com"),
-            'w<a href="www.example.com" target="_blank">www.example.com</a>',
-        )
-
    def test_url_edge_following_period_eol(self):
        self.assertEqual(
            self.parser.convert_urls("www.example.com."),
-            '<a href="www.example.com" target="_blank">www.example.com</a>.',
+            '<a href="//www.example.com" target="_blank">www.example.com</a>.',
        )

    def test_url_edge_following_period(self):
        self.assertEqual(
            self.parser.convert_urls("see www.example.com. "),
-            'see <a href="www.example.com" target="_blank">www.example.com</a>. ',
+            'see <a href="//www.example.com" target="_blank">www.example.com</a>. ',
        )

    def test_url_edge_brackets(self):
@ -356,3 +350,9 @@ class TestTextToHTMLparser(TestCase):
            '</span><a href="http://example.com/" target="_blank">'
            'http://example.com/</a><span class="red">',
        )
+
+    def test_non_url_with_www(self):
+        self.assertEqual(
+            self.parser.convert_urls('Awwww.this should not be highlighted'),
+            'Awwww.this should not be highlighted'
+        )
--- a/evennia/utils/text2html.py
+++ b/evennia/utils/text2html.py
@ -88,8 +88,9 @@ class TextToHTMLparser(object):
        re.S | re.M | re.I,
    )
    re_url = re.compile(
-        r'(?<!=")((?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)'
+        r'(?<!=")(\b(?:ftp|www|https?)\W+(?:(?!\.(?:\s|$)|&\w+;)[^"\',;$*^\\(){}<>\[\]\s])+)(\.(?:\s|$)|&\w+;|)'
    )
+    re_protocol = re.compile(r'^(?:ftp|https?)://')
    re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL)
    re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL)

@ -147,9 +148,19 @@ class TextToHTMLparser(object):
            text (str): Processed text.

        """
-        # -> added target to output prevent the web browser from attempting to
-        # change pages (and losing our webclient session).
-        return self.re_url.sub(r'<a href="\1" target="_blank">\1</a>\2', text)
+        m = self.re_url.search(text)
+        if m:
+          href = m.group(1)
+          label = href
+          # if there is no protocol (i.e. starts with www) prefix with // so the link isn't treated as relative
+          if not self.re_protocol.match(href):
+            href = "//" + href
+          rest = m.group(2)
+          # -> added target to output prevent the web browser from attempting to
+          # change pages (and losing our webclient session).
+          return text[:m.start()] + f'<a href="{href}" target="_blank">{label}</a>{rest}' + text[m.end():]
+        else:
+          return text

    def sub_mxp_links(self, match):
        """