From d1dfc1062da2eeb04d7e7802a2ecfdd657dde9f2 Mon Sep 17 00:00:00 2001 From: Vincent Le Goff Date: Mon, 17 Jul 2017 19:05:53 +0200 Subject: [PATCH] Add the generator contrib to generate pseudo-random strings --- evennia/contrib/generator.py | 332 +++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 evennia/contrib/generator.py diff --git a/evennia/contrib/generator.py b/evennia/contrib/generator.py new file mode 100644 index 0000000000..5bc0395ece --- /dev/null +++ b/evennia/contrib/generator.py @@ -0,0 +1,332 @@ +""" +Pseudo-random generator and registry + +Evennia contribution - Vincent-lg 2017 + +This contrib can be used to generate pseudo-random strings of information +with specific criteria. You could, for instance, use it to generate +phone numbers, license plate numbers, validation codes, non-sensivite +passwords and so on. The strings generated by the generator will be +stored and won't be available again in order to avoid repetition. +Here's a very simple example: + +```python +from evennia.contrib.generator import Generator +# Create a generator for phone numbers +phone_generator = Generator("phone number", r"555-\d{3}-\d{4}") +# Generate a phone number +number = phone_generator.generate() +# `number` will contain something like: "555-981-2207" +# If you call `phone_generator.generate`, it won't give the same anymore. +phone_generator.all() +# Will return a list of all currently-used phone numbers +phone_generator.free("555-981-2207") +# The number can be generated again. +``` + +To use it, you will need to: + +1. Import the `Generator` class from the contrib. +2. Create an instance of this class taking two arguments: + - Tje name of tje gemerator (like "phone number", "license plate"...). + - The regular expression representing the expected results. +3. Use the generator's `all`, `generate` and `free` methods as shown above. + +Some examples of regular expressions you could use: + +- `r"555-\d{3}-\d{4}"`: 555, a dash, 3 digits, another dash, 4 digits. +- `r"[0-9]{3}[A-Z][0-9]{3}"`: 3 digits, a capital letter, 3 digits. +- `r"[A-Za-z0-9]{8,15}"`: between 8 and 15 letters and digits. +- ... + +Behind the scenes, a script is created to store the generated information +for a single generator. The `Generator` object will also read the regular +expression you give to it to see what information is required (letters, +digits, a more restricted class, simple characters...). More complex +regular expressions (with branches for instance) might not be available. + +""" + +from random import choice, randint, seed +import re +import string +import time + +from evennia import DefaultScript, ScriptDB +from evennia.utils.create import create_script + +class RejectedRegex(RuntimeError): + + """The provided regular expression has been rejected. + + More details regarding why this error occurred will be provided in + the message. The usual reason is the provided regular expression is + not specific enough and could lead to inconsistent generating. + + """ + + pass + + +class ExhaustedGenerator(RuntimeError): + + """The generator hasn't any available strings to generate anymore.""" + + pass + + +class GeneratorScript(DefaultScript): + + """ + The global script to hold all generators. + + It will be automatically created the first time `generate` is called + on a Generator object. + + """ + + def at_script_creation(self): + """Hook called when the script is created.""" + self.key = "generator_script" + self.desc = "Global generator script" + self.persistent = True + + # Permanent data to be stored + self.db.generated = {} + + +class Generator(object): + + """ + A generator class to generate pseudo-random strings with a rule. + + The "rule" defining what the generator should provide in terms of + string is given as a regular expression when creating instances of + this class. You can use the `all` method to get all generated strings, + the `generate` method to generate a new string, the `free` method + to remove a generated string, or the `clear` method to remove all + generated strings. + + """ + + script = None + + def __init__(self, name, regex): + """ + Create a new generator. + + Args: + name (str): name of the generator to create. + regex (str): regular expression describing the generator. + + Notes: + `name` should be an explicit name. If you use more than one + generator in your game, be sure to give them different names. + This name will be used to store the generated information + in the global script, and in case of errors. + + The regular expression should describe the generator, what + it should generate: a phone number, a license plate, a password + or something else. Regular expressions allow you to use + pretty advanced criteria, but be aware that some regular + expressions will be rejected if not specific enough. + + Raises: + RejectedRegex: the provided regular expression couldn't be + accepted as a valid generator description. + + """ + self.name = name + self.elements = [] + self.total = 1 + + # Analyze the regex if any + if regex: + self.find_elements(regex) + + def __repr__(self): + return "".format(self.name) + + def all(self): + """ + Return all generated strings for this generator. + + Returns: + strings (list of strr): the list of strings that are already + used. The strings that were generated first come first in the list. + + """ + script = self._get_script() + generated = list(script.db.generated.get(self.name, [])) + return generated + + def generate(self, store=True, keep_trying=True): + """ + Generate a pseudo-random string according to the regular expression. + + Args: + store (bool, optional): store the generated string in the script. + keep_trying (bool, optional): keep on trying if the string already exists. + + Returns: + The newly-generated string. + + Raises: + ExhaustedGenerator: if there's no available string in this generator. + + Note: + Unless asked explicitly, the returned string can't repeat itself. + + """ + script = self._get_script() + generated = script.db.generated.get(self.name) + if generated is None: + script.db.generated[self.name] = [] + generated = script.db.generated[self.name] + + if len(generated) >= self.total: + raise ExhaustedGenerator + + # Generate a pseudo-random string that might be used already + result = "" + for element in self.elements: + number = randint(element["min"], element["max"]) + chars = element["chars"] + for index in range(number): + char = choice(chars) + result += char + + # If the string has already been generated, try again + if result in generated and keep_trying: + # Change the random seed, incrementing it slowly + epoch = time.time() + while result in generated: + epoch += 1 + seed(epoch) + result = self.generate(store=False, keep_trying=False) + + if store: + generated.append(result) + + return result + + def free(self, element): + """ + Removes a generated string from the list of stored strings. + + Args: + element (str): the string to remove from the list of generated strings. + + Note: + The specified string has to be present in the script (so + has to have been generated). It will remove this entry + from the script, so this string could be generated again by + calling the `generate` method. + + """ + script = self._get_script() + generated = script.db.generated.get(self.name, []) + if element not in generated: + raise ValueError("the string {} isn't stored as generated by the generator {}".format( + element, self.name)) + + generated.remove(element) + + def clear(self): + """ + Clear the generator of all generated strings. + + """ + script = self._get_script() + generated = script.db.generated.get(self.name, []) + generated[:] = [] + + def _get_script(self): + """Get or create the script.""" + if type(self).script: + return type(self).script + + try: + script = ScriptDB.objects.get(db_key="generator_script") + except ScriptDB.DoesNotExist: + script = create_script("contrib.generator.GeneratorScript") + + type(self).script = script + return script + + def find_elements(self, regex): + """ + Find the elements described in the regular expression. This will + analyze the provided regular expression and try to find elements. + + Args: + regex (str): the regular expression. + + """ + self.total = 1 + self.elements = [] + tree = re.sre_parse.parse(regex).data + # `tree` contains a list of elements in the regular expression + for element in tree: + # `eleemnt` is also a list, the first element is a string + name = element[0] + desc = {"min": 1, "max": 1} + + # If `.`, break here + if name == "any": + raise RejectedRegex("the . definition is too broad, specify what you need more precisely") + elif name == "at": + # Either the beginning or end, we ignore it + continue + elif name == "min_repeat": + raise RejectedRegex("you have to provide a maximum number of this character class") + elif name == "max_repeat": + desc["min"] = element[1][0] + desc["max"] = element[1][1] + desc["chars"] = self._find_literal(element[1][2][0]) + elif name == "in": + desc["chars"] = self._find_literal(element) + elif name == "literal": + desc["chars"] = self._find_literal(element) + else: + raise RejectedRegex("unhandled regex syntax:: {}".format(repr(name))) + + self.elements.append(desc) + self.total *= len(desc["chars"]) ** desc["max"] + + def _find_literal(self, element): + """Find the literal corresponding to a piece of regular expression.""" + chars = [] + if element[0] == "literal": + chars.append(chr(element[1])) + elif element[0] == "in": + negate = False + if element[1][0][0] == "negate": + negate = True + chars = list(string.ascii_letters + string.digits) + + for part in element[1]: + if part[0] == "negate": + continue + + sublist = self._find_literal(part) + for char in sublist: + if negate: + if char in chars: + chars.remove(char) + else: + chars.append(char) + elif element[0] == "range": + chars = [chr(i) for i in range(element[1][0], element[1][1] + 1)] + elif element[0] == "category": + category = element[1] + if category == "category_digit": + chars = list(string.digits) + elif category == "category_word": + chars = list(string.letters) + else: + raise RejectedRegex("unknown category: {}".format(category)) + else: + raise RejectedRegex("cannot find the literal: {}".format(element[0])) + + return chars