Merge pull request #1960 from dbarbuzzi/bugfix/1952-utils-latinify

Update utils.latinify to support Python 3
This commit is contained in:
Griatch 2019-10-01 21:31:36 +02:00 committed by GitHub
commit 76fd00a2cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 1 deletions

View file

@ -223,3 +223,20 @@ class TestImportFunctions(TestCase):
test_path = self._t_dir_file("invalid_filename.py")
loaded_mod = utils.mod_import_from_path(test_path)
self.assertIsNone(loaded_mod)
class LatinifyTest(TestCase):
def setUp(self):
super().setUp()
self.example_str = 'It naïvely says, “plugh.”'
self.expected_output = 'It naively says, "plugh."'
def test_plain_string(self):
result = utils.latinify(self.example_str)
self.assertEqual(result, self.expected_output)
def test_byte_string(self):
byte_str = utils.to_bytes(self.example_str)
result = utils.latinify(byte_str)
self.assertEqual(result, self.expected_output)

View file

@ -761,7 +761,10 @@ _UNICODE_MAP = {
"EN DASH": "-",
"HORIZONTAL BAR": "-",
"HORIZONTAL ELLIPSIS": "...",
"LEFT SINGLE QUOTATION MARK": "'",
"RIGHT SINGLE QUOTATION MARK": "'",
"LEFT DOUBLE QUOTATION MARK": '"',
"RIGHT DOUBLE QUOTATION MARK": '"',
}
@ -788,10 +791,13 @@ def latinify(string, default="?", pure_ascii=False):
from unicodedata import name
if isinstance(string, bytes):
string = string.decode("utf8")
converted = []
for unich in iter(string):
try:
ch = unich.decode("ascii")
ch = unich.encode("utf8").decode("ascii")
except UnicodeDecodeError:
# deduce a latin letter equivalent from the Unicode data
# point name; e.g., since `name(u'á') == 'LATIN SMALL