Update utils.latinify to support Python 3

2026-03-31 21:17:17 +02:00 · 2019-10-01 12:11:11 -04:00 · 2019-10-01 12:11:11 -04:00 · 96d5734e51
commit 96d5734e51
parent 1115a941c1
2 changed files with 40 additions and 1 deletions
--- a/evennia/utils/tests/test_utils.py
+++ b/evennia/utils/tests/test_utils.py
@ -223,3 +223,36 @@ class TestImportFunctions(TestCase):
        test_path = self._t_dir_file("invalid_filename.py")
        loaded_mod = utils.mod_import_from_path(test_path)
        self.assertIsNone(loaded_mod)
+
+
+class LatinifyTest(TestCase):
+    """
+    utils._UNICODE_MAP may need some additional entries to resolve these tests--
+
+        LEFT DOUBLE QUOTATION MARK: "
+        RIGHT DOUBLE QUOTATION MARK: "
+
+    """
+    def setUp(self):
+        super().setUp()
+
+        self.example_str = 'It says, “plugh.”'
+        self.example_ustr = u'It says, “plugh.”'
+
+        self.expected_output = 'It says, "plugh."'
+
+    def test_plain_string(self):
+        result = utils.latinify(self.example_str)
+        self.assertEqual(result, self.expected_output)
+
+    def test_unicode_string(self):
+        result = utils.latinify(self.example_ustr)
+        self.assertEqual(result, self.expected_output)
+
+    def test_encoded_string(self):
+        result = utils.latinify(self.example_str.encode('utf8'))
+        self.assertEqual(result, self.expected_output)
+
+    def test_byte_string(self):
+        result = utils.latinify(utils.to_bytes(self.example_str))
+        self.assertEqual(result, self.expected_output)
--- a/evennia/utils/utils.py
+++ b/evennia/utils/utils.py
@ -761,7 +761,10 @@ _UNICODE_MAP = {
    "EN DASH": "-",
    "HORIZONTAL BAR": "-",
    "HORIZONTAL ELLIPSIS": "...",
+    "LEFT SINGLE QUOTATION MARK": "'",
    "RIGHT SINGLE QUOTATION MARK": "'",
+    "LEFT DOUBLE QUOTATION MARK": '"',
+    "RIGHT DOUBLE QUOTATION MARK": '"',
 }


@ -788,10 +791,13 @@ def latinify(string, default="?", pure_ascii=False):

    from unicodedata import name

+    if isinstance(string, bytes):
+        string = string.decode("utf8")
+
    converted = []
    for unich in iter(string):
        try:
-            ch = unich.decode("ascii")
+            ch = unich.encode("utf8").decode("ascii")
        except UnicodeDecodeError:
            # deduce a latin letter equivalent from the Unicode data
            # point name; e.g., since `name(u'á') == 'LATIN SMALL