From 80878fb12466c5a52bf5c8f5271c8118a781fe87 Mon Sep 17 00:00:00 2001 From: selberhad Date: Mon, 3 Oct 2022 14:50:18 -0400 Subject: [PATCH 1/3] fix: improve convert_urls --- evennia/utils/tests/test_tagparsing.py | 22 +++++++++++----------- evennia/utils/text2html.py | 19 +++++++++++++++---- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/evennia/utils/tests/test_tagparsing.py b/evennia/utils/tests/test_tagparsing.py index c684ab9e3b..44d1cf9681 100644 --- a/evennia/utils/tests/test_tagparsing.py +++ b/evennia/utils/tests/test_tagparsing.py @@ -250,13 +250,13 @@ class TestTextToHTMLparser(TestCase): def test_url_scheme_ftp(self): self.assertEqual( self.parser.convert_urls("ftp.example.com"), - 'ftp.example.com', + 'ftp.example.com', ) def test_url_scheme_www(self): self.assertEqual( self.parser.convert_urls("www.example.com"), - 'www.example.com', + 'www.example.com', ) def test_url_scheme_ftpproto(self): @@ -280,7 +280,7 @@ class TestTextToHTMLparser(TestCase): def test_url_chars_slash(self): self.assertEqual( self.parser.convert_urls("www.example.com/homedir"), - 'www.example.com/homedir', + 'www.example.com/homedir', ) def test_url_chars_colon(self): @@ -313,22 +313,16 @@ class TestTextToHTMLparser(TestCase): ' target="_blank">https://groups.google.com/forum/?fromgroups#!categories/evennia/ainneve', ) - def test_url_edge_leadingw(self): - self.assertEqual( - self.parser.convert_urls("wwww.example.com"), - 'wwww.example.com', - ) - def test_url_edge_following_period_eol(self): self.assertEqual( self.parser.convert_urls("www.example.com."), - 'www.example.com.', + 'www.example.com.', ) def test_url_edge_following_period(self): self.assertEqual( self.parser.convert_urls("see www.example.com. "), - 'see www.example.com. ', + 'see www.example.com. ', ) def test_url_edge_brackets(self): @@ -356,3 +350,9 @@ class TestTextToHTMLparser(TestCase): '' 'http://example.com/', ) + + def test_non_url_with_www(self): + self.assertEqual( + self.parser.convert_urls('Awwww.this should not be highlighted'), + 'Awwww.this should not be highlighted' + ) diff --git a/evennia/utils/text2html.py b/evennia/utils/text2html.py index 1a0b2bf6cc..e2fedc835f 100644 --- a/evennia/utils/text2html.py +++ b/evennia/utils/text2html.py @@ -88,8 +88,9 @@ class TextToHTMLparser(object): re.S | re.M | re.I, ) re_url = re.compile( - r'(?\[\]\s])+)(\.(?:\s|$)|&\w+;|)' + r'(?\[\]\s])+)(\.(?:\s|$)|&\w+;|)' ) + re_protocol = re.compile(r'^(?:ftp|https?)://') re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL) re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL) @@ -147,9 +148,19 @@ class TextToHTMLparser(object): text (str): Processed text. """ - # -> added target to output prevent the web browser from attempting to - # change pages (and losing our webclient session). - return self.re_url.sub(r'\1\2', text) + m = self.re_url.search(text) + if m: + href = m.group(1) + label = href + # if there is no protocol (i.e. starts with www) prefix with // so the link isn't treated as relative + if not self.re_protocol.match(href): + href = "//" + href + rest = m.group(2) + # -> added target to output prevent the web browser from attempting to + # change pages (and losing our webclient session). + return text[:m.start()] + f'{label}{rest}' + text[m.end():] + else: + return text def sub_mxp_links(self, match): """ From dfb623ce90b67042cffb6fe4afc152bdcfae0a4c Mon Sep 17 00:00:00 2001 From: selberhad Date: Mon, 3 Oct 2022 20:49:44 -0400 Subject: [PATCH 2/3] prevent urls with no protocol that are too short from being highlighted --- evennia/utils/tests/test_tagparsing.py | 6 ++++++ evennia/utils/text2html.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/evennia/utils/tests/test_tagparsing.py b/evennia/utils/tests/test_tagparsing.py index 44d1cf9681..933d38d3f2 100644 --- a/evennia/utils/tests/test_tagparsing.py +++ b/evennia/utils/tests/test_tagparsing.py @@ -356,3 +356,9 @@ class TestTextToHTMLparser(TestCase): self.parser.convert_urls('Awwww.this should not be highlighted'), 'Awwww.this should not be highlighted' ) + + def test_invalid_www_url(self): + self.assertEqual( + self.parser.convert_urls('www.t'), + 'www.t' + ) diff --git a/evennia/utils/text2html.py b/evennia/utils/text2html.py index e2fedc835f..d10c1c5d39 100644 --- a/evennia/utils/text2html.py +++ b/evennia/utils/text2html.py @@ -91,6 +91,7 @@ class TextToHTMLparser(object): r'(?\[\]\s])+)(\.(?:\s|$)|&\w+;|)' ) re_protocol = re.compile(r'^(?:ftp|https?)://') + re_valid_no_protocol = re.compile(r'^(?:www|ftp)\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&//=]*') re_mxplink = re.compile(r"\|lc(.*?)\|lt(.*?)\|le", re.DOTALL) re_mxpurl = re.compile(r"\|lu(.*?)\|lt(.*?)\|le", re.DOTALL) @@ -152,8 +153,11 @@ class TextToHTMLparser(object): if m: href = m.group(1) label = href - # if there is no protocol (i.e. starts with www) prefix with // so the link isn't treated as relative + # if there is no protocol (i.e. starts with www or ftp) + # prefix with // so the link isn't treated as relative if not self.re_protocol.match(href): + if not self.re_valid_no_protocol.match(href): + return text href = "//" + href rest = m.group(2) # -> added target to output prevent the web browser from attempting to From d6074c13863fd283ab71cacecd6fd4b8b0478838 Mon Sep 17 00:00:00 2001 From: selberhad Date: Fri, 7 Oct 2022 22:19:58 -0400 Subject: [PATCH 3/3] change protocol-relative urls to http --- evennia/utils/tests/test_tagparsing.py | 10 +++++----- evennia/utils/text2html.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/evennia/utils/tests/test_tagparsing.py b/evennia/utils/tests/test_tagparsing.py index 933d38d3f2..76cc9dd801 100644 --- a/evennia/utils/tests/test_tagparsing.py +++ b/evennia/utils/tests/test_tagparsing.py @@ -250,13 +250,13 @@ class TestTextToHTMLparser(TestCase): def test_url_scheme_ftp(self): self.assertEqual( self.parser.convert_urls("ftp.example.com"), - 'ftp.example.com', + 'ftp.example.com', ) def test_url_scheme_www(self): self.assertEqual( self.parser.convert_urls("www.example.com"), - 'www.example.com', + 'www.example.com', ) def test_url_scheme_ftpproto(self): @@ -280,7 +280,7 @@ class TestTextToHTMLparser(TestCase): def test_url_chars_slash(self): self.assertEqual( self.parser.convert_urls("www.example.com/homedir"), - 'www.example.com/homedir', + 'www.example.com/homedir', ) def test_url_chars_colon(self): @@ -316,13 +316,13 @@ class TestTextToHTMLparser(TestCase): def test_url_edge_following_period_eol(self): self.assertEqual( self.parser.convert_urls("www.example.com."), - 'www.example.com.', + 'www.example.com.', ) def test_url_edge_following_period(self): self.assertEqual( self.parser.convert_urls("see www.example.com. "), - 'see www.example.com. ', + 'see www.example.com. ', ) def test_url_edge_brackets(self): diff --git a/evennia/utils/text2html.py b/evennia/utils/text2html.py index d10c1c5d39..9ea74bf269 100644 --- a/evennia/utils/text2html.py +++ b/evennia/utils/text2html.py @@ -154,11 +154,11 @@ class TextToHTMLparser(object): href = m.group(1) label = href # if there is no protocol (i.e. starts with www or ftp) - # prefix with // so the link isn't treated as relative + # prefix with http:// so the link isn't treated as relative if not self.re_protocol.match(href): if not self.re_valid_no_protocol.match(href): return text - href = "//" + href + href = "http://" + href rest = m.group(2) # -> added target to output prevent the web browser from attempting to # change pages (and losing our webclient session).