Add some more tests to catch faulty language definitions

This commit is contained in:
Griatch 2017-12-05 22:03:34 +01:00
parent d4b2ed6b8a
commit a1dee8d1a1
2 changed files with 35 additions and 16 deletions

View file

@ -110,7 +110,7 @@ _PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y
"s z sh zh ch jh k ng g m n l r w"
_VOWELS = "eaoiuy"
# these must be able to be constructed from phonemes (so for example,
# if you have v here, there must exixt at least one single-character
# if you have v here, there must exist at least one single-character
# vowel phoneme defined above)
_GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv cvcvcvcvv"
@ -120,12 +120,12 @@ _RE_WORD = re.compile(r'\w+', _RE_FLAGS)
_RE_EXTRA_CHARS = re.compile(r'\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])', _RE_FLAGS)
class LanguageExistsError(Exception):
message = "Language is already created. Re-adding it will re-build" \
" its dictionary map. Use 'force=True' keyword if you are sure."
class LanguageError(RuntimeError):
pass
def __str__(self):
return self.message
class LanguageExistsError(LanguageError):
pass
class LanguageHandler(DefaultScript):
@ -229,21 +229,28 @@ class LanguageHandler(DefaultScript):
"""
if key in self.db.language_storage and not force:
raise LanguageExistsError
# allowed grammar are grouped by length
gramdict = defaultdict(list)
for gram in grammar.split():
gramdict[len(gram)].append(gram)
grammar = dict(gramdict)
raise LanguageExistsError(
"Language is already created. Re-adding it will re-build"
" its dictionary map. Use 'force=True' keyword if you are sure.")
# create grammar_component->phoneme mapping
# {"vv": ["ea", "oh", ...], ...}
grammar2phonemes = defaultdict(list)
for phoneme in phonemes.split():
if re.search("\W", phoneme):
raise LanguageError("The phoneme '%s' contains an invalid character" % phoneme)
gram = "".join(["v" if char in vowels else "c" for char in phoneme])
grammar2phonemes[gram].append(phoneme)
# allowed grammar are grouped by length
gramdict = defaultdict(list)
for gram in grammar.split():
if re.search("\W|(!=[cv])", gram):
raise LanguageError("The grammar '%s' is invalid (only 'c' and 'v' are allowed)" % gram)
gramdict[len(gram)].append(gram)
grammar = dict(gramdict)
# create automatic translation
translation = {}

View file

@ -34,9 +34,8 @@ class TestLanguage(EvenniaTest):
auto_translations=atrans,
force=True)
rplanguage.add_language(key="binary",
phonemes="oo ii ck w b d t",
phonemes="oo ii a ck w b d t",
grammar="cvvv cvv cvvcv cvvcvv cvvvc cvvvcvv cvvc",
vowels="oei",
noun_prefix='beep-',
word_length_variance=4)
@ -50,13 +49,26 @@ class TestLanguage(EvenniaTest):
self.assertEqual(result0, text)
result1 = rplanguage.obfuscate_language(text, level=1.0, language="testlang")
result2 = rplanguage.obfuscate_language(text, level=1.0, language="testlang")
result3 = rplanguage.obfuscate_language(text, level=1.0, language='binary')
self.assertNotEqual(result1, text)
self.assertNotEqual(result3, text)
result1, result2 = result1.split(), result2.split()
self.assertEqual(result1[:4], result2[:4])
self.assertEqual(result1[1], "1")
self.assertEqual(result1[2], "2")
self.assertEqual(result2[-1], result2[-1])
print(rplanguage.obfuscate_language(text, level=1.0, language='binary'))
def test_faulty_language(self):
self.assertRaises(
rplanguage.LanguageError,
rplanguage.add_language,
key='binary2',
phonemes="w b d t oe ee, oo e o a wh dw bw", # erroneous comma
grammar="cvvv cvv cvvcv cvvcvvo cvvvc cvvvcvv cvvc c v cc vv ccvvc ccvvccvv ",
vowels="oea",
word_length_variance=4)
def test_available_languages(self):
self.assertEqual(rplanguage.available_languages(), ["testlang", "binary"])