Fix and cleanup the rplanguage contrib a bit

2026-03-26 09:46:32 +01:00 · 2017-12-05 19:54:40 +01:00 · 2017-12-05 19:54:40 +01:00 · d4b2ed6b8a
commit d4b2ed6b8a
parent 5986f99424
2 changed files with 83 additions and 28 deletions
--- a/evennia/contrib/rplanguage.py
+++ b/evennia/contrib/rplanguage.py
@ -96,6 +96,7 @@ import re
 from random import choice, randint
 from collections import defaultdict
 from evennia import DefaultScript
+from evennia.utils import logger


 #------------------------------------------------------------
@ -105,7 +106,8 @@ from evennia import DefaultScript
 #------------------------------------------------------------

 # default language grammar
-_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh s z sh zh ch jh k ng g m n l r w"
+_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw a e i u y p b t d f v t dh " \
+            "s z sh zh ch jh k ng g m n l r w"
 _VOWELS = "eaoiuy"
 # these must be able to be constructed from phonemes (so for example,
 # if you have v here, there must exixt at least one single-character
@ -115,12 +117,16 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv
 _RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
 _RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
 _RE_WORD = re.compile(r'\w+', _RE_FLAGS)
+_RE_EXTRA_CHARS = re.compile(r'\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])', _RE_FLAGS)


 class LanguageExistsError(Exception):
    message = "Language is already created. Re-adding it will re-build" \
              " its dictionary map. Use 'force=True' keyword if you are sure."

+    def __str__(self):
+        return self.message
+

 class LanguageHandler(DefaultScript):
    """
@ -156,8 +162,11 @@ class LanguageHandler(DefaultScript):
        self.db.language_storage = {}

    def add(self, key="default", phonemes=_PHONEMES,
-            grammar=_GRAMMAR, word_length_variance=0, noun_prefix="",
-            noun_postfix="", vowels=_VOWELS, manual_translations=None,
+            grammar=_GRAMMAR, word_length_variance=0,
+            noun_translate=False,
+            noun_prefix="",
+            noun_postfix="",
+            vowels=_VOWELS, manual_translations=None,
            auto_translations=None, force=False):
        """
        Add a new language. Note that you generally only need to do
@ -170,14 +179,21 @@ class LanguageHandler(DefaultScript):
                will be used as an identifier for the language so it
                should be short and unique.
            phonemes (str, optional): Space-separated string of all allowed
-                phonemes in this language.
+                phonemes in this language. If either of the base phonemes
+                (c, v, cc, vv) are present in the grammar, the phoneme list must
+                at least include one example of each.
            grammar (str): All allowed consonant (c) and vowel (v) combinations
-                allowed to build up words. For example cvv would be a consonant
-                followed by two vowels (would allow for a word like 'die').
+                allowed to build up words. Grammars are broken into the base phonemes
+                (c, v, cc, vv) prioritizing the longer bases. So cvv would be a
+                the c + vv (would allow for a word like 'die' whereas
+                cvcvccc would be c+v+c+v+cc+c (a word like 'galosch').
            word_length_variance (real): The variation of length of words.
                0 means a minimal variance, higher variance may mean words
                have wildly varying length; this strongly affects how the
                language "looks".
+            noun_translate (bool, optional): If a proper noun, identified as a
+                capitalized word, should be translated or not. By default they
+                will not, allowing for e.g. the names of characters to be understandable.
            noun_prefix (str, optional): A prefix to go before every noun
                in this language (if any).
            noun_postfix (str, optuonal): A postfix to go after every noun
@ -261,6 +277,7 @@ class LanguageHandler(DefaultScript):
                   "grammar": grammar,
                   "grammar2phonemes": dict(grammar2phonemes),
                   "word_length_variance": word_length_variance,
+                   "noun_translate": noun_translate,
                   "noun_prefix": noun_prefix,
                   "noun_postfix": noun_postfix}
        self.db.language_storage[key] = storage
@ -282,34 +299,63 @@ class LanguageHandler(DefaultScript):
        """
        word = match.group()
        lword = len(word)
+
        if len(word) <= self.level:
            # below level. Don't translate
            new_word = word
        else:
-            # translate the word
+            # try to translate the word from dictionary
            new_word = self.language["translation"].get(word.lower(), "")
            if not new_word:
-                if word.istitle():
-                    # capitalized word we don't have a translation for -
-                    # treat as a name (don't translate)
-                    new_word = "%s%s%s" % (self.language["noun_prefix"], word, self.language["noun_postfix"])
-                else:
-                    # make up translation on the fly. Length can
-                    # vary from un-translated word.
-                    wlen = max(0, lword + sum(randint(-1, 1) for i
-                                              in range(self.language["word_length_variance"])))
-                    grammar = self.language["grammar"]
-                    if wlen not in grammar:
+                # no dictionary translation. Generate one
+
+                # find out what preceeded this word
+                wpos = match.start()
+                preceeding = match.string[:wpos].strip()
+                start_sentence = preceeding.endswith(".") or not preceeding
+
+                # make up translation on the fly. Length can
+                # vary from un-translated word.
+                wlen = max(0, lword + sum(randint(-1, 1) for i
+                                          in range(self.language["word_length_variance"])))
+                grammar = self.language["grammar"]
+                if wlen not in grammar:
+                    if randint(0, 1) == 0:
                        # this word has no direct translation!
-                        return ""
+                        wlen = 0
+                        new_word = ''
+                    else:
+                        # use random word length
+                        wlen = choice(grammar.keys())
+
+                if wlen:
                    structure = choice(grammar[wlen])
                    grammar2phonemes = self.language["grammar2phonemes"]
                    for match in _RE_GRAMMAR.finditer(structure):
                        # there are only four combinations: vv,cc,c,v
-                        new_word += choice(grammar2phonemes[match.group()])
-            if word.istitle():
-                # capitalize words the same way
-                new_word = new_word.capitalize()
+                        try:
+                            new_word += choice(grammar2phonemes[match.group()])
+                        except KeyError:
+                            logger.log_trace("You need to supply at least one example of each of "
+                                             "the four base phonemes (c, v, cc, vv)")
+                            # abort translation here
+                            new_word = ''
+                            break
+
+                if word.istitle():
+                    title_word = ''
+                    if not start_sentence and not self.language.get("noun_translate", False):
+                        # don't translate what we identify as proper nouns (names)
+                        title_word = word
+                    elif new_word:
+                        title_word = new_word
+
+                    if title_word:
+                        # Regardless of if we translate or not, we will add the custom prefix/postfixes
+                        new_word = "%s%s%s" % (self.language["noun_prefix"],
+                                               title_word.capitalize(),
+                                               self.language["noun_postfix"])
+
            if len(word) > 1 and word.isupper():
                # keep LOUD words loud also when translated
                new_word = new_word.upper()
@ -341,7 +387,9 @@ class LanguageHandler(DefaultScript):

        # configuring the translation
        self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
-        return _RE_WORD.sub(self._translate_sub, text)
+        translation = _RE_WORD.sub(self._translate_sub, text)
+        # the substitution may create too long empty spaces, remove those
+        return _RE_EXTRA_CHARS.sub("", translation)


 # Language access functions
--- a/evennia/contrib/tests.py
+++ b/evennia/contrib/tests.py
@ -18,7 +18,7 @@ from evennia.contrib import rplanguage
 mtrans = {"testing": "1", "is": "2", "a": "3", "human": "4"}
 atrans = ["An", "automated", "advantageous", "repeatable", "faster"]

-text = "Automated testing is advantageous for a number of reasons:" \
+text = "Automated testing is advantageous for a number of reasons: " \
       "tests may be executed Continuously without the need for human " \
       "intervention, They are easily repeatable, and often faster."

@ -33,6 +33,12 @@ class TestLanguage(EvenniaTest):
                                manual_translations=mtrans,
                                auto_translations=atrans,
                                force=True)
+        rplanguage.add_language(key="binary",
+                                phonemes="oo ii ck w b d t",
+                                grammar="cvvv cvv cvvcv cvvcvv cvvvc cvvvcvv cvvc",
+                                vowels="oei",
+                                noun_prefix='beep-',
+                                word_length_variance=4)

    def tearDown(self):
        super(TestLanguage, self).tearDown()
@ -50,16 +56,17 @@ class TestLanguage(EvenniaTest):
        self.assertEqual(result1[1], "1")
        self.assertEqual(result1[2], "2")
        self.assertEqual(result2[-1], result2[-1])
+        print(rplanguage.obfuscate_language(text, level=1.0, language='binary'))

    def test_available_languages(self):
-        self.assertEqual(rplanguage.available_languages(), ["testlang"])
+        self.assertEqual(rplanguage.available_languages(), ["testlang", "binary"])

    def test_obfuscate_whisper(self):
        self.assertEqual(rplanguage.obfuscate_whisper(text, level=0.0), text)
        assert (rplanguage.obfuscate_whisper(text, level=0.1).startswith(
-            '-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons:t-sts m-y b- -x-cut-d Continuously'))
+            '-utom-t-d t-sting is -dv-nt-g-ous for - numb-r of r--sons: t-sts m-y b- -x-cut-d Continuously'))
        assert(rplanguage.obfuscate_whisper(text, level=0.5).startswith(
-            '--------- --s---- -s -----------s f-- - ------ -f ---s--s:--s-s '))
+            '--------- --s---- -s -----------s f-- - ------ -f ---s--s: --s-s '))
        self.assertEqual(rplanguage.obfuscate_whisper(text, level=1.0), "...")

 # Testing of emoting / sdesc / recog system