diff --git a/CHANGELOG.md b/CHANGELOG.md index 925f37a022..b25f94f981 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ - Renamed Tutorial classes "Weapon" and "WeaponRack" to "TutorialWeapon" and "TutorialWeaponRack" to prevent collisions with classes in mygame - New `crafting` contrib, adding a full crafting subsystem (Griatch 2020) +- The `rplanguage` contrib now auto-capitalizes sentences and retains ellipsis (...). This + change means that proper nouns at the start of sentences will not be treated as nouns. ### Evennia 0.9.5 (2019-2020) diff --git a/evennia/contrib/rplanguage.py b/evennia/contrib/rplanguage.py index d6f2c59b4d..f40781550f 100644 --- a/evennia/contrib/rplanguage.py +++ b/evennia/contrib/rplanguage.py @@ -60,19 +60,44 @@ Usage: Below is an example of "elvish", using "rounder" vowels and sounds: ```python + # vowel/consonant grammar possibilities + grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc " + "vcvvccvvc cvcvvcvvcc vcvcvvccvcvv") + + # all not in this group is considered a consonant + vowels = "eaoiuy" + + # you need a representative of all of the minimal grammars here, so if a + # grammar v exists, there must be atleast one phoneme available with only + # one vowel in it phonemes = ("oi oh ee ae aa eh ah ao aw ay er ey ow ia ih iy " "oy ua uh uw y p b t d f v t dh s z sh zh ch jh k " "ng g m n l r w") - vowels = "eaoiuy" - grammar = ("v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc " - "vcvvccvvc cvcvvcvvcc vcvcvvccvcvv") + + # how much the translation varies in length compared to the original. 0 is + # smallest, higher values give ever bigger randomness (including removing + # short words entirely) word_length_variance = 1 + + # if a proper noun (word starting with capitalized letter) should be + # translated or not. If not (default) it means e.g. names will remain + # unchanged across languages. + noun_translate = False + + # all proper nouns (words starting with a capital letter not at the beginning + # of a sentence) can have either a postfix or -prefix added at all times noun_postfix = "'la" + + # words in dict will always be translated this way. The 'auto_translations' + # is instead a list or filename to file with words to use to help build a + # bigger dictionary by creating random translations of each word in the + # list *once* and saving the result for subsequent use. manual_translations = {"the":"y'e", "we":"uyi", "she":"semi", "he":"emi", "you": "do", 'me':'mi','i':'me', 'be':"hy'e", 'and':'y'} rplanguage.add_language(key="elvish", phonemes=phonemes, grammar=grammar, word_length_variance=word_length_variance, + noun_translate=noun_translate, noun_postfix=noun_postfix, vowels=vowels, manual_translations=manual_translations, auto_translations="my_word_file.txt") @@ -117,7 +142,8 @@ _GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv _RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.DOTALL + re.UNICODE _RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS) _RE_WORD = re.compile(r"\w+", _RE_FLAGS) -_RE_EXTRA_CHARS = re.compile(r"\s+(?=\W)|[,.?;](?=[,.?;]|\s+[,.?;])", _RE_FLAGS) +# superfluous chars, except ` ... ` +_RE_EXTRA_CHARS = re.compile(r"\s+(?!... )(?=\W)|[,.?;](?!.. )(?=[,?;]|\s+[,.?;])", _RE_FLAGS) class LanguageError(RuntimeError): @@ -198,9 +224,13 @@ class LanguageHandler(DefaultScript): 0 means a minimal variance, higher variance may mean words have wildly varying length; this strongly affects how the language "looks". - noun_translate (bool, optional): If a proper noun, identified as a - capitalized word, should be translated or not. By default they - will not, allowing for e.g. the names of characters to be understandable. + noun_translate (bool, optional): If a proper noun should be translated or + not. By default they will not, allowing for e.g. the names of characters + to be understandable. A 'noun' is identified as a capitalized word + *not at the start of a sentence*. This simple metric means that names + starting a sentence always will be translated (- but hey, maybe + the fantasy language just never uses a noun at the beginning of + sentences, who knows?) noun_prefix (str, optional): A prefix to go before every noun in this language (if any). noun_postfix (str, optuonal): A postfix to go after every noun @@ -245,7 +275,7 @@ class LanguageHandler(DefaultScript): # {"vv": ["ea", "oh", ...], ...} grammar2phonemes = defaultdict(list) for phoneme in phonemes.split(): - if re.search("\W", phoneme): + if re.search(r"\W", phoneme): raise LanguageError("The phoneme '%s' contains an invalid character" % phoneme) gram = "".join(["v" if char in vowels else "c" for char in phoneme]) grammar2phonemes[gram].append(phoneme) @@ -253,7 +283,7 @@ class LanguageHandler(DefaultScript): # allowed grammar are grouped by length gramdict = defaultdict(list) for gram in grammar.split(): - if re.search("\W|(!=[cv])", gram): + if re.search(r"\W|(!=[cv])", gram): raise LanguageError( "The grammar '%s' is invalid (only 'c' and 'v' are allowed)" % gram ) @@ -325,6 +355,11 @@ class LanguageHandler(DefaultScript): word = match.group() lword = len(word) + # find out what preceeded this word + wpos = match.start() + preceeding = match.string[:wpos].strip() + start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding + if len(word) <= self.level: # below level. Don't translate new_word = word @@ -334,11 +369,6 @@ class LanguageHandler(DefaultScript): if not new_word: # no dictionary translation. Generate one - # find out what preceeded this word - wpos = match.start() - preceeding = match.string[:wpos].strip() - start_sentence = preceeding.endswith((".", "!", "?")) or not preceeding - # make up translation on the fly. Length can # vary from un-translated word. wlen = max( @@ -373,24 +403,30 @@ class LanguageHandler(DefaultScript): break if word.istitle(): - title_word = "" - if not start_sentence and not self.language.get("noun_translate", False): - # don't translate what we identify as proper nouns (names) - title_word = word - elif new_word: - title_word = new_word + if not start_sentence: + # this is a noun. We miss nouns at the start of + # sentences this way, but it's as good as we can get + # with this simple analysis. Maybe the fantasy language + # just don't consider nouns at the beginning of + # sentences, who knows? + if not self.language.get("noun_translate", False): + # don't translate what we identify as proper nouns (names) + new_word = word - if title_word: - # Regardless of if we translate or not, we will add the custom prefix/postfixes - new_word = "%s%s%s" % ( - self.language["noun_prefix"], - title_word.capitalize(), - self.language["noun_postfix"], + # add noun prefix and/or postfix + new_word = "{prefix}{word}{postfix}".format( + prefix=self.language["noun_prefix"], + word=new_word.capitalize(), + postfix=self.language["noun_postfix"], ) if len(word) > 1 and word.isupper(): # keep LOUD words loud also when translated new_word = new_word.upper() + + if start_sentence: + new_word = new_word.capitalize() + return new_word def translate(self, text, level=0.0, language="default"): @@ -497,19 +533,18 @@ def available_languages(): return list(_LANGUAGE_HANDLER.attributes.get("language_storage", {})) -# ------------------------------------------------------------ +# ----------------------------------------------------------------------------- # # Whisper obscuration # -# This obsucration table is designed by obscuring certain -# vowels first, following by consonants that tend to be -# more audible over long distances, like s. Finally it -# does non-auditory replacements, like exclamation marks -# and capitalized letters (assumed to be spoken louder) that may still -# give a user some idea of the sentence structure. Then the word -# lengths are also obfuscated and finally the whisper # length itself. +# This obsucration table is designed by obscuring certain vowels first, +# following by consonants that tend to be more audible over long distances, +# like s. Finally it does non-auditory replacements, like exclamation marks and +# capitalized letters (assumed to be spoken louder) that may still give a user +# some idea of the sentence structure. Then the word lengths are also +# obfuscated and finally the whisper length itself. # -# ------------------------------------------------------------ +# ------------------------------------------------------------------------------ _RE_WHISPER_OBSCURE = [