mirror of
https://github.com/evennia/evennia.git
synced 2026-03-26 09:46:32 +01:00
Fix and cleanup the rplanguage contrib a bit
This commit is contained in:
parent
5986f99424
commit
d4b2ed6b8a
2 changed files with 83 additions and 28 deletions
|
|
@ -96,6 +96,7 @@ import re
|
|||
from random import choice, randint
|
||||
from collections import defaultdict
|
||||
from evennia import DefaultScript
|
||||
from evennia.utils import logger
|
||||
|
||||
|
||||
#------------------------------------------------------------
|
||||
|
|
@ -105,7 +106,8 @@ from evennia import DefaultScript
|
|||
#------------------------------------------------------------
|
||||
|
||||
# default language grammar
|
||||
_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh s z sh zh ch jh k ng g m n l r w"
|
||||
_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh " \
|
||||
"s z sh zh ch jh k ng g m n l r w"
|
||||
_VOWELS = "eaoiuy"
|
||||
# these must be able to be constructed from phonemes (so for example,
|
||||
# if you have v here, there must exixt at least one single-character
|
||||
|
|
@ -115,12 +117,16 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
|
|||
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
|
||||
_RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
|
||||
_RE_WORD = re.compile(r'\w+', _RE_FLAGS)
|
||||
_RE_EXTRA_CHARS = re.compile(r'\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])', _RE_FLAGS)
|
||||
|
||||
|
||||
class LanguageExistsError(Exception):
|
||||
message = "Language is already created. Re-adding it will re-build" \
|
||||
" its dictionary map. Use 'force=True' keyword if you are sure."
|
||||
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
|
||||
class LanguageHandler(DefaultScript):
|
||||
"""
|
||||
|
|
@ -156,8 +162,11 @@ class LanguageHandler(DefaultScript):
|
|||
self.db.language_storage = {}
|
||||
|
||||
def add(self, key="default", phonemes=_PHONEMES,
|
||||
grammar=_GRAMMAR, word_length_variance=0, noun_prefix="",
|
||||
noun_postfix="", vowels=_VOWELS, manual_translations=None,
|
||||
grammar=_GRAMMAR, word_length_variance=0,
|
||||
noun_translate=False,
|
||||
noun_prefix="",
|
||||
noun_postfix="",
|
||||
vowels=_VOWELS, manual_translations=None,
|
||||
auto_translations=None, force=False):
|
||||
"""
|
||||
Add a new language. Note that you generally only need to do
|
||||
|
|
@ -170,14 +179,21 @@ class LanguageHandler(DefaultScript):
|
|||
will be used as an identifier for the language so it
|
||||
should be short and unique.
|
||||
phonemes (str, optional): Space-separated string of all allowed
|
||||
phonemes in this language.
|
||||
phonemes in this language. If either of the base phonemes
|
||||
(c, v, cc, vv) are present in the grammar, the phoneme list must
|
||||
at least include one example of each.
|
||||
grammar (str): All allowed consonant (c) and vowel (v) combinations
|
||||
allowed to build up words. For example cvv would be a consonant
|
||||
followed by two vowels (would allow for a word like 'die').
|
||||
allowed to build up words. Grammars are broken into the base phonemes
|
||||
(c, v, cc, vv) prioritizing the longer bases. So cvv would be a
|
||||
the c + vv (would allow for a word like 'die' whereas
|
||||
cvcvccc would be c+v+c+v+cc+c (a word like 'galosch').
|
||||
word_length_variance (real): The variation of length of words.
|
||||
0 means a minimal variance, higher variance may mean words
|
||||
have wildly varying length; this strongly affects how the
|
||||
language "looks".
|
||||
noun_translate (bool, optional): If a proper noun, identified as a
|
||||
capitalized word, should be translated or not. By default they
|
||||
will not, allowing for e.g. the names of characters to be understandable.
|
||||
noun_prefix (str, optional): A prefix to go before every noun
|
||||
in this language (if any).
|
||||
noun_postfix (str, optuonal): A postfix to go after every noun
|
||||
|
|
@ -261,6 +277,7 @@ class LanguageHandler(DefaultScript):
|
|||
"grammar": grammar,
|
||||
"grammar2phonemes": dict(grammar2phonemes),
|
||||
"word_length_variance": word_length_variance,
|
||||
"noun_translate": noun_translate,
|
||||
"noun_prefix": noun_prefix,
|
||||
"noun_postfix": noun_postfix}
|
||||
self.db.language_storage[key] = storage
|
||||
|
|
@ -282,34 +299,63 @@ class LanguageHandler(DefaultScript):
|
|||
"""
|
||||
word = match.group()
|
||||
lword = len(word)
|
||||
|
||||
if len(word) <= self.level:
|
||||
# below level. Don't translate
|
||||
new_word = word
|
||||
else:
|
||||
# translate the word
|
||||
# try to translate the word from dictionary
|
||||
new_word = self.language["translation"].get(word.lower(), "")
|
||||
if not new_word:
|
||||
if word.istitle():
|
||||
# capitalized word we don't have a translation for -
|
||||
# treat as a name (don't translate)
|
||||
new_word = "%s%s%s" % (self.language["noun_prefix"], word, self.language["noun_postfix"])
|
||||
else:
|
||||
# make up translation on the fly. Length can
|
||||
# vary from un-translated word.
|
||||
wlen = max(0, lword + sum(randint(-1, 1) for i
|
||||
in range(self.language["word_length_variance"])))
|
||||
grammar = self.language["grammar"]
|
||||
if wlen not in grammar:
|
||||
# no dictionary translation. Generate one
|
||||
|
||||
# find out what preceeded this word
|
||||
wpos = match.start()
|
||||
preceeding = match.string[:wpos].strip()
|
||||
start_sentence = preceeding.endswith(".") or not preceeding
|
||||
|
||||
# make up translation on the fly. Length can
|
||||
# vary from un-translated word.
|
||||
wlen = max(0, lword + sum(randint(-1, 1) for i
|
||||
in range(self.language["word_length_variance"])))
|
||||
grammar = self.language["grammar"]
|
||||
if wlen not in grammar:
|
||||
if randint(0, 1) == 0:
|
||||
# this word has no direct translation!
|
||||
return ""
|
||||
wlen = 0
|
||||
new_word = ''
|
||||
else:
|
||||
# use random word length
|
||||
wlen = choice(grammar.keys())
|
||||
|
||||
if wlen:
|
||||
structure = choice(grammar[wlen])
|
||||
grammar2phonemes = self.language["grammar2phonemes"]
|
||||
for match in _RE_GRAMMAR.finditer(structure):
|
||||
# there are only four combinations: vv,cc,c,v
|
||||
new_word += choice(grammar2phonemes[match.group()])
|
||||
if word.istitle():
|
||||
# capitalize words the same way
|
||||
new_word = new_word.capitalize()
|
||||
try:
|
||||
new_word += choice(grammar2phonemes[match.group()])
|
||||
except KeyError:
|
||||
logger.log_trace("You need to supply at least one example of each of "
|
||||
"the four base phonemes (c, v, cc, vv)")
|
||||
# abort translation here
|
||||
new_word = ''
|
||||
break
|
||||
|
||||
if word.istitle():
|
||||
title_word = ''
|
||||
if not start_sentence and not self.language.get("noun_translate", False):
|
||||
# don't translate what we identify as proper nouns (names)
|
||||
title_word = word
|
||||
elif new_word:
|
||||
title_word = new_word
|
||||
|
||||
if title_word:
|
||||
# Regardless of if we translate or not, we will add the custom prefix/postfixes
|
||||
new_word = "%s%s%s" % (self.language["noun_prefix"],
|
||||
title_word.capitalize(),
|
||||
self.language["noun_postfix"])
|
||||
|
||||
if len(word) > 1 and word.isupper():
|
||||
# keep LOUD words loud also when translated
|
||||
new_word = new_word.upper()
|
||||
|
|
@ -341,7 +387,9 @@ class LanguageHandler(DefaultScript):
|
|||
|
||||
# configuring the translation
|
||||
self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
|
||||
return _RE_WORD.sub(self._translate_sub, text)
|
||||
translation = _RE_WORD.sub(self._translate_sub, text)
|
||||
# the substitution may create too long empty spaces, remove those
|
||||
return _RE_EXTRA_CHARS.sub("", translation)
|
||||
|
||||
|
||||
# Language access functions
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from evennia.contrib import rplanguage
|
|||
mtrans = {"testing": "1", "is": "2", "a": "3", "human": "4"}
|
||||
atrans = ["An", "automated", "advantageous", "repeatable", "faster"]
|
||||
|
||||
text = "Automated testing is advantageous for a number of reasons:" \
|
||||
text = "Automated testing is advantageous for a number of reasons: " \
|
||||
"tests may be executed Continuously without the need for human " \
|
||||
"intervention, They are easily repeatable, and often faster."
|
||||
|
||||
|
|
@ -33,6 +33,12 @@ class TestLanguage(EvenniaTest):
|
|||
manual_translations=mtrans,
|
||||
auto_translations=atrans,
|
||||
force=True)
|
||||
rplanguage.add_language(key="binary",
|
||||
phonemes="oo ii ck w b d t",
|
||||
grammar="cvvv cvv cvvcv cvvcvv cvvvc cvvvcvv cvvc",
|
||||
vowels="oei",
|
||||
noun_prefix='beep-',
|
||||
word_length_variance=4)
|
||||
|
||||
def tearDown(self):
|
||||
super(TestLanguage, self).tearDown()
|
||||
|
|
@ -50,16 +56,17 @@ class TestLanguage(EvenniaTest):
|
|||
self.assertEqual(result1[1], "1")
|
||||
self.assertEqual(result1[2], "2")
|
||||
self.assertEqual(result2[-1], result2[-1])
|
||||
print(rplanguage.obfuscate_language(text, level=1.0, language='binary'))
|
||||
|
||||
def test_available_languages(self):
|
||||
self.assertEqual(rplanguage.available_languages(), ["testlang"])
|
||||
self.assertEqual(rplanguage.available_languages(), ["testlang", "binary"])
|
||||
|
||||
def test_obfuscate_whisper(self):
|
||||
self.assertEqual(rplanguage.obfuscate_whisper(text, level=0.0), text)
|
||||
assert (rplanguage.obfuscate_whisper(text, level=0.1).startswith(
|
||||
'-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons:t-sts m-y b- -x-cut-d Continuously'))
|
||||
'-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons: t-sts m-y b- -x-cut-d Continuously'))
|
||||
assert(rplanguage.obfuscate_whisper(text, level=0.5).startswith(
|
||||
'--------- --s---- -s -----------s f-- - ------ -f ---s--s:--s-s '))
|
||||
'--------- --s---- -s -----------s f-- - ------ -f ---s--s: --s-s '))
|
||||
self.assertEqual(rplanguage.obfuscate_whisper(text, level=1.0), "...")
|
||||
|
||||
# Testing of emoting / sdesc / recog system
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue