Added the rplanguages module for obfuscating languages and whispers, taken from my own rp development code.

This commit is contained in:
Griatch 2015-09-24 01:55:37 +02:00
parent 23e63111cc
commit dc5d8f8130
2 changed files with 473 additions and 13 deletions

View file

@ -0,0 +1,444 @@
"""
Language and whisper obfuscation system
Evennia contrib - Griatch 2015
This module is intented to be used with an emoting system
(such as contrib/rpsystem.py). It offers the ability to
obfuscate spoken words in the game in various ways:
- Language: The language functionality defines a
pseudo-language map to any number of languages.
The string will be obfuscated depending on a scaling
that (most likely) will be input as a weighted average of
the language skill of the speaker and listener.
- Whisper: The whisper functionality will gradually
"fade out" a whisper along as scale 0-1, where the
fading is based on gradually removing sections of the
whisper that is (supposedly) easier to overhear (for
example "s" sounds tend to be audible even when no
other meaning can be determined).
Usage:
```python
from evennia.contrib import rplanguages
# need to be done once, here we create the "default" lang
rplanguages.add_language()
say = "This is me talking."
whisper = "This is me whispering.
print rplanguages.obfuscate_language(say, level=0.0)
<<< "This is me talking."
print rplanguages.obfuscate_language(say, level=0.5)
<<< "This is me byngyry."
print rplanguages.obfuscate_language(say, level=1.0)
<<< "Daly ly sy byngyry."
result = rplanguages.obfuscate_whisper(whisper, level=0.0)
<<< "This is me whispering"
result = rplanguages.obfuscate_whisper(whisper, level=0.2)
<<< "This is m- whisp-ring"
result = rplanguages.obfuscate_whisper(whisper, level=0.5)
<<< "---s -s -- ---s------"
result = rplanguages.obfuscate_whisper(whisper, level=0.7)
<<< "---- -- -- ----------"
result = rplanguages.obfuscate_whisper(whisper, level=1.0)
<<< "..."
```
To set up new languages, import and use the `add_language()`
helper method in this module. This allows you to customize the
"feel" of the semi-random language you are creating. Especially
the `word_length_variance` helps vary the length of translated
words compared to the original and can help change the "feel" for
the language you are creating. You can also add your own
dictionary and "fix" random words for a list of input words.
Below is an example of "elvish", using "rounder" vowels and sounds:
```python
phonemes = "oi oh ee ae aa eh ah ao aw ay er ey ow ia ih iy " \
"oy ua uh uw y p b t d f v t dh s z sh zh ch jh k " \
"ng g m n l r w",
vowels = "eaoiuy"
grammar = "v vv vvc vcc vvcc cvvc vccv vvccv vcvccv vcvcvcc vvccvvcc " \
"vcvvccvvc cvcvvcvvcc vcvcvvccvcvv",
word_length_variance = 1
noun_postfix = "'la"
manual_translations = {"the":"y'e", "we":"uyi", "she":"semi", "he":"emi",
"you": "do", 'me':'mi','i':'me', 'be':"hy'e", 'and':'y'}
rplanguages.add_language(key="elvish", phonemes=phonemes, grammar=grammar,
word_length_variance=word_length_variance,
noun_postfix=noun_postfix, vowels=vowels,
manual_translations=manual_translations
auto_translations="my_word_file.txt")
```
This will produce a decicively more "rounded" and "soft" language
than the default one. The few manual_translations also make sure to
make it at least look superficially "reasonable".
The `auto_translations` keyword is useful, this accepts either a
list or a path to a file of words (one per line) to automatically
create fixed translations for according to the grammatical rules.
This allows to quickly build a large corpus of translated words
that never change (if this is desired).
"""
import re
from random import choice, randint
from collections import defaultdict
from evennia import DefaultScript
#------------------------------------------------------------
#
# Obfuscate language
#
#------------------------------------------------------------
# default language grammar
_PHONEMES = "ea oh ae aa eh ah ao aw ai er ey ow ia ih iy oy ua uh uw y p b t d f v t dh s z sh zh ch jh k ng g m n l r w"
_VOWELS = "eaoiuy"
_GRAMMAR = "v cv vc cvv vcc vcv cvcc vccv cvccv cvcvcc cvccvcv vccvccvc cvcvccvv cvcvcvcvv"
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
_RE_GRAMMAR = re.compile(r"vv|cc|v|c", _RE_FLAGS)
_RE_WORD = re.compile(r'\w+', _RE_FLAGS)
class LanguageExistsError(Exception):
message = "Language is already created. Re-adding it will re-build" \
" its dictionary map. Use 'force=True' keyword if you are sure."
class LanguageHandler(DefaultScript):
"""
This is a storage class that should usually not be created on its
own. It's automatically created by a call to `obfuscate_language`
or `add_language` below.
Languages are implemented as a "logical" pseudo- consistent language
algorith here. The idea is that a language is built up from
phonemes. These are joined together according to a "grammar" of
possible phoneme- combinations and allowed characters. It may
sound simplistic, but this allows to easily make
"similar-sounding" languages. One can also custom-define a
dictionary of some common words to give further consistency.
Optionally, the system also allows an input list of common words
to be loaded and given random translations. These will be stored
to disk and will thus not change. This gives a decent "stability"
of the language but if the goal is to obfuscate, this may allow
players to eventually learn to understand the gist of a sentence
even if their characters can not. Any number of languages can be
created this way.
This nonsense language will partially replace the actual spoken
language when so desired (usually because the speaker/listener
don't know the language well enough).
"""
def at_script_creation(self):
"Called when script is first started"
self.key = "language_handler"
self.db.language_storage = {}
def add(self, key="default", phonemes=_PHONEMES,
grammar=_GRAMMAR, word_length_variance=0, noun_prefix="",
noun_postfix="", vowels=_VOWELS, manual_translation=None,
auto_translations="word_frequency.txt", force=False):
"""
Add a new language. Note that you generally only need to do
this once per language and that adding an existing language
will re-initialize all the random components to new permanent
values.
Args:
key (str, optional): The name of the language. This
will be used as an identifier for the language so it
should be short and unique.
phonemes (str, optional): Space-separated string of all allowed
phonemes in this language.
grammar (str): All allowed consonant (c) and vowel (v) combinations
allowed to build up words. For example cvv would be a consonant
followed by two vowels (would allow for a word like 'die').
word_length_variance (real): The variation of length of words.
0 means a minimal variance, higher variance may mean words
have wildly varying length; this strongly affects how the
language "looks".
noun_prefix (str, optional): A prefix to go before every noun
in this language (if any).
noun_postfix (str, optuonal): A postfix to go after every noun
in this language (if any, usually best to avoid combining
with `noun_prefix` or language becomes very wordy).
vowels (str, optional): Every vowel allowed in this language.
manual_translation (dict, optional): This allows for custom-setting
certain words in the language to mean the same thing. It is
on the form `{real_word: fictional_word}`, for example
`{"the", "y'e"}` .
auto_translations (str or list, optional): These are lists
words that should be auto-translated with a random, but
fixed, translation. If a path to a file, this file should
contain a list of words to produce translations for, one
word per line. If a list, the list's elements should be
the words to translate. The `manual_translations` will
always override overlapping translations created
automatically.
force (bool, optional): Unless true, will not allow the addition
of a language that is already created.
Raises:
LanguageExistsError: Raised if trying to adding a language
with a key that already exists, without `force` being set.
Notes:
The `word_file` is for example a word-frequency list for
the N most common words in the host language. The
translations will be random, but will be stored
persistently to always be the same. This allows for
building a quick, decently-sounding fictive language that
tend to produce the same "translation" (mostly) with the
same input sentence.
"""
if key in self.db.language_storage and not force:
raise LanguageExistsError
# allowed grammar are grouped by length
gramdict = defaultdict(list)
for gram in grammar.split():
gramdict[len(gram)].append(gram)
grammar = dict(gramdict)
# create grammar_component->phoneme mapping
# {"vv": ["ea", "oh", ...], ...}
grammar2phonemes = defaultdict(list)
for phoneme in phonemes.split():
gram = "".join(["v" if char in vowels else "c" for char in phoneme])
grammar2phonemes[gram].append(phoneme)
# create automatic translation
translation = {}
if auto_translations:
if isinstance(auto_translations, basestring):
# path to a file rather than a list
with open(auto_translations, 'r') as f:
auto_translations = f.readlines()
for word in auto_translations:
word = word.strip()
lword = len(word)
new_word = ""
wlen = max(0, lword + sum(randint(-1,1) for i
in range(word_length_variance)))
if wlen not in grammar:
# always create a translation, use random length
structure = choice(grammar[choice(grammar.keys())])
else:
# use the corresponding length
structure = choice(grammar[wlen])
for match in _RE_GRAMMAR.finditer(structure):
new_word += choice(grammar2phonemes[match.group()])
translation[word.lower()] = new_word.lower()
if manual_translation:
# update with manual translations
translation.update(dict((key.lower(), value.lower()) for key, value in manual_translation.items()))
# store data
storage = {"translation" : translation,
"grammar": grammar,
"grammar2phonemes": dict(grammar2phonemes),
"word_length_variance": word_length_variance,
"noun_prefix": noun_prefix,
"noun_postfix": noun_postfix}
self.db.language_storage[key] = storage
def _translate_sub(self, match):
"""
Replacer method called by re.sub when
traversing the language string.
Args:
match (re.matchobj): Match object from regex.
Returns:
converted word.
Notes:
Assumes self.lastword and self.level is available
on the object.
"""
word = match.group()
lword = len(word)
if len(word) <= self.level:
# below level. Don't translate
self.lastword = word
return word
elif word.istitle() and not self.lastword.istitle():
# capitalized word inside text - treat as a
# name (don't translate) but don't allow several in a row.
new_word = "%s%s%s" % (self.language["noun_prefix"], word, self.language["noun_postfix"])
self.lastword = word
return new_word
else:
# translate the word
new_word = self.language["translation"].get(word.lower(), "")
if not new_word:
# make up translation on the fly. Length can
# vary from un-translated word.
wlen = max(0, lword + sum(randint(-1,1) for i
in range(self.language["word_length_variance"])))
grammar = self.language["grammar"]
if wlen not in grammar:
# this word has no direct translation!
return ""
structure = choice(grammar[wlen])
grammar2phonemes = self.language["grammar2phonemes"]
for match in _RE_GRAMMAR.finditer(structure):
# there are only four combinations: vv,cc,c,v
new_word += choice(grammar2phonemes[match.group()])
if word.istitle():
# capitalize words correctly
new_word = new_word.capitalize()
if len(word) > 1 and word.isupper():
# keep LOUD words loud also when translated
new_word = new_word.upper()
return new_word
def translate(self, text, level=0.0, language="default"):
"""
Translate the text according to the given level.
Args:
text (str): The text to translate
level (real): Value between 0.0 and 1.0, where
0.0 means no obfuscation (text returned unchanged) and
1.0 means full conversion of every word. The closer to
1, the shorter words will be translated.
language (str): The language key identifier.
Returns:
text (str): A translated string.
"""
if level == 0.0:
# no translation
return text
language = self.db.language_storage.get(language, None)
if not language:
return text
self.language = language
# configuring the translation
self.level = int(10 * (1.0 - max(0, min(level, 1.0))))
self.lastword = ""
return _RE_WORD.sub(self._translate_sub, text)
# Language access functions
_LANGUAGE_HANDLER = None
def obfuscate_language(text, level=0.0, language="default"):
"""
Main access method for the language parser.
Args:
text (str): Text to obfuscate.
level (real, optional): A value from 0.0-1.0 determining
the level of obfuscation where 0 means no obfuscation
(string returned unchanged) and 1.0 means the entire
string is obfuscated.
language (str, optional): The identifier of a language
the system understands.
Returns:
translated (str): The translated text.
"""
# initialize the language handler and cache it
global _LANGUAGE_HANDLER
if not _LANGUAGE_HANDLER:
try:
_LANGUAGE_HANDLER = LanguageHandler.objects.get(db_key="language_handler")
except LanguageHandler.DoesNotExist:
if not _LANGUAGE_HANDLER:
from evennia import create_script
_LANGUAGE_HANDLER = create_script(LanguageHandler)
return _LANGUAGE_HANDLER.translate(text, level=level, language=language)
def add_language(**kwargs):
"""
Access function to creating a new language. See the docstring of
`LanguageHandler.add` for list of keyword arguments.
"""
global _LANGUAGE_HANDLER
if not _LANGUAGE_HANDLER:
try:
_LANGUAGE_HANDLER = LanguageHandler.objects.get(db_key="language_handler")
except LanguageHandler.DoesNotExist:
if not _LANGUAGE_HANDLER:
from evennia import create_script
_LANGUAGE_HANDLER = create_script(LanguageHandler)
_LANGUAGE_HANDLER.add(**kwargs)
#------------------------------------------------------------
#
# Whisper obscuration
#
# This obsucration table is designed by obscuring certain
# vowels first, following by consonants that tend to be
# more audible over long distances, like s. Finally it
# does non-auditory replacements, like exclamation marks
# and capitalized letters (assumed to be spoken louder) that may still
# give a user some idea of the sentence structure. Then the word
# lengths are also obfuscated and finally the whisper # length itself.
#
#------------------------------------------------------------
_RE_WHISPER_OBSCURE = [
re.compile(r"^$", _RE_FLAGS), # This is a Test! #0 full whisper
re.compile(r"[ae]", _RE_FLAGS), # This -s - Test! #1 add uy
re.compile(r"[aeuy]", _RE_FLAGS), # This -s - Test! #2 add oue
re.compile(r"[aeiouy]", _RE_FLAGS), # Th-s -s - T-st! #3 add all consonants
re.compile(r"[aeiouybdhjlmnpqrv]", _RE_FLAGS), # T--s -s - T-st! #4 add hard consonants
re.compile(r"[a-eg-rt-z]", _RE_FLAGS), # T--s -s - T-s-! #5 add all capitals
re.compile(r"[A-EG-RT-Za-eg-rt-z]", _RE_FLAGS), # ---s -s - --s-! #6 add f
re.compile(r"[A-EG-RT-Za-rt-z]", _RE_FLAGS), # ---s -s - --s-! #7 add s
re.compile(r"[A-EG-RT-Za-z]", _RE_FLAGS), # ---- -- - ----! #8 add capital F
re.compile(r"[A-RT-Za-z]", _RE_FLAGS), # ---- -- - ----! #9 add capital S
re.compile(r"[\w]", _RE_FLAGS), # ---- -- - ----! #10 non-alphanumerals
re.compile(r"[\S]", _RE_FLAGS), # ---- -- - ---- #11 words
re.compile(r"[\w\W]", _RE_FLAGS), # -------------- #12 whisper length
re.compile(r".*", _RE_FLAGS)] # ... #13 (always same length)
def obfuscate_whisper(whisper, level=0.0):
"""
Obfuscate whisper depending on a pre-calculated level
(that may depend on distance, listening skill etc)
Args:
whisper (str): The whisper string to obscure. The
entire string will be considered in the obscuration.
level (real, optional): This is a value 0-1, where 0
means not obscured (whisper returned unchanged) and 1
means fully obscured.
"""
level = min(max(0.0, level), 1.0)
olevel = int(13.0 * level)
return _RE_WHISPER_OBSCURE[olevel].sub('...' if olevel == 13.0 else '-', whisper)

View file

@ -1,7 +1,7 @@
"""
RP base system for Evennia
Contrib by Griatch, 2015
Contribution - Griatch, 2015
This RP base system introduces the following features to a game,
@ -67,6 +67,20 @@ from evennia.utils.utils import lazy_property
# Emote parser
#------------------------------------------------------------
# Settings
# The prefix is the (single-character) symbol used to find the start
# of a object reference, such as /tall (note that
# the system will understand multi-word references).
_PREFIX = "/"
# The num_sep is the (single-character) symbol used to separate the
# sdesc from the number when trying to separate identical sdescs from
# one another. This is the same syntax used in the rest of Evennia, so
# by default, multiple "tall" can be separated by entering 1-tall,
# 2-tall etc.
_NUM_SEP = "-"
# Texts
_EMOTE_NOMATCH_ERROR = \
@ -81,11 +95,7 @@ _LANGUAGE_NOMATCH_ERROR = \
_RE_FLAGS = re.MULTILINE + re.IGNORECASE + re.UNICODE
# The prefix is the (single-character) symbol used to find the start
# of a object reference, such as /tall (note that
# the system will understand multi-word references).
_PREFIX = "/"
_RE_PREFIX = re.compile(r"^/", re.UNICODE)
_RE_PREFIX = re.compile(r"^%s" % _PREFIX, re.UNICODE)
# The num_sep is the (single-character) symbol used to separate the
# sdesc from the number when trying to separate identical sdescs from
@ -121,16 +131,20 @@ _RE_LANGUAGE = re.compile(r"(?:(\w+))*(\".+?\")")
#TODO
# make this into a pluggable language module for handling
# language errors and translations.
_LANGUAGE_MODULE = None # load code here
#TODO function determining if a given langname exists. Note that
# langname can be None if not specified explicitly.
_LANGUAGE_AVAILABLE = lambda langname: True
#TODO function to translate a string in a given language
_LANGUAGE_TRANSLATE = lambda speaker, listener, language, text: "%s%s" % ("(%s" % language if language else "", text)
_LANGUAGE_TRANSLATE = lambda speaker, listener, language, text: "%s%s" % ("(%s)" % language if language else "", text)
#TODO list available languages
_LANGUAGE_LIST = lambda: []
# color markup to use for coloring sdescs/recog strings
# in emotes and spoken language quotes.
_LANGUAGE_COLOR = lambda obj: "{w"
_RECOG_COLOR = lambda obj: "{b"
# the emote parser works in two steps:
# 1) convert the incoming emote into an intermediary
@ -378,7 +392,7 @@ def parse_sdescs_and_recogs(sender, candidates, string, search_mode=False):
elif nmatches == 1:
key = "#%i" % obj.id
string = string[:istart0] + "{%s}" % key + string[istart + maxscore:]
mapping[key] = obj if search_mode else (obj.db.sdesc or obj.key)
mapping[key] = obj if search_mode else (obj.sdesc.get() or obj.key)
else:
refname = marker_match.group()
reflist = ["%s%s%s (%s%s)" % (inum+1, _NUM_SEP,
@ -452,15 +466,17 @@ def send_emote(sender, receivers, emote, anonymous_add="first"):
pass
# handle the language mapping, which always produce different keys ##nn
for key, (langname, saytext) in language_mapping.iteritems():
# color say's white
mapping[key] = "{w%s{n" % _LANGUAGE_TRANSLATE(sender, receiver, langname, saytext)
# color says
mapping[key] = "%s%s{n" % (_LANGUAGE_COLOR(receiver),
_LANGUAGE_TRANSLATE(sender, receiver, langname, saytext))
# make sure receiver always sees their real name
rkey = "#%i" % receiver.id
if rkey in mapping:
mapping[rkey] = receiver.key
#TODO - color handling
mapping = dict((key, "%s" % val) for key, val in mapping.iteritems())
# add color to recog strings
mapping = dict((key, "%s%s{n" % (_RECOG_COLOR(receiver), val))
for key, val in mapping.iteritems())
# do the template replacement
receiver.msg(emote.format(**mapping))