mirror of
https://github.com/Tiendil/pynames.git
synced 2025-09-22 04:40:47 +02:00
added FromCSVTablesGenerator. added IronKingdoms gobber fullnames generator
This commit is contained in:
parent
30d93facce
commit
33b303f34e
13 changed files with 306 additions and 14 deletions
|
@ -1,5 +1,6 @@
|
|||
# coding: utf-8
|
||||
|
||||
|
||||
class PynamesError(Exception):
|
||||
MSG = None
|
||||
|
||||
|
@ -7,7 +8,6 @@ class PynamesError(Exception):
|
|||
super(PynamesError, self).__init__(self.MSG % kwargs)
|
||||
|
||||
|
||||
|
||||
class NoDefaultNameValue(PynamesError):
|
||||
MSG = u'Name: can not get default value for name with data: %(raw_data)r'
|
||||
|
||||
|
@ -15,6 +15,7 @@ class NoDefaultNameValue(PynamesError):
|
|||
class FromListGeneratorError(PynamesError):
|
||||
pass
|
||||
|
||||
|
||||
class NoNamesLoadedFromListError(FromListGeneratorError):
|
||||
MSG = u'no names loaded from "%(source)s"'
|
||||
|
||||
|
@ -22,9 +23,16 @@ class NoNamesLoadedFromListError(FromListGeneratorError):
|
|||
class FromTablesGeneratorError(PynamesError):
|
||||
pass
|
||||
|
||||
|
||||
class WrongTemplateStructureError(FromTablesGeneratorError):
|
||||
MSG = u'wrong template structure - cannot choose template for genders %(genders)r with template source: "%(source)s"'
|
||||
|
||||
|
||||
class NotEqualFormsLengths(FromTablesGeneratorError):
|
||||
MSG = u'not equal forms lengths: [%(left)r] and [%(right)r]'
|
||||
|
||||
|
||||
class WrongCSVData(FromTablesGeneratorError):
|
||||
def __init__(self, msg, **kwargs):
|
||||
self.MSG = msg
|
||||
super(WrongCSVData, self).__init__(**kwargs)
|
||||
|
|
|
@ -1,7 +1,14 @@
|
|||
# coding: utf-8
|
||||
|
||||
# python lib:
|
||||
import json
|
||||
import random
|
||||
from collections import Iterable
|
||||
|
||||
# thirdparties:
|
||||
import unicodecsv
|
||||
|
||||
# pynames:
|
||||
from pynames.relations import GENDER, LANGUAGE, LANGUAGE_FORMS_LANGTH
|
||||
from pynames.names import Name
|
||||
from pynames.base import BaseGenerator
|
||||
|
@ -45,15 +52,38 @@ class Template(object):
|
|||
return left + right
|
||||
|
||||
def get_name(self, tables):
|
||||
languages = dict( (lang, u'') for lang in self.languages)
|
||||
languages = dict(
|
||||
(lang, u'') for lang in self.languages
|
||||
)
|
||||
for slug in self.template:
|
||||
record = random.choice(tables[slug])
|
||||
languages = { lang:self.merge_forms(forms, record['languages'][lang]) for lang, forms in languages.iteritems()}
|
||||
languages = {
|
||||
lang: self.merge_forms(forms, record['languages'][lang])
|
||||
for lang, forms in languages.iteritems()
|
||||
}
|
||||
|
||||
genders = dict( (gender, languages) for gender in self.genders)
|
||||
genders = dict(
|
||||
(gender, languages)
|
||||
for gender in self.genders
|
||||
)
|
||||
|
||||
return Name(self.native_language, {'genders': genders})
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.native_language == other.native_language
|
||||
and self.languages == other.languages
|
||||
and self.probability == other.probability
|
||||
and self.genders == other.genders
|
||||
and self.template == other.template
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.native_language, self.languages, self.probability, self.genders, ';'.join(self.template)))
|
||||
|
||||
def __repr__(self):
|
||||
return "<pynames.from_tables_generator.Template: %s=%s>" % (self.name, self.template)
|
||||
|
||||
|
||||
class FromTablesGenerator(BaseGenerator):
|
||||
|
||||
|
@ -64,18 +94,22 @@ class FromTablesGenerator(BaseGenerator):
|
|||
self.templates_choices = {}
|
||||
self.templates = []
|
||||
self.tables = {}
|
||||
self.source_loader(self.SOURCE)
|
||||
|
||||
if self.SOURCE is None:
|
||||
def source_loader(self, source):
|
||||
if source is None:
|
||||
error_msg = 'FromTablesGenerator: you must make subclass of FromTablesGenerator and define attribute SOURCE in it.'
|
||||
raise NotImplementedError(error_msg)
|
||||
|
||||
with open(self.SOURCE) as f:
|
||||
with open(source) as f:
|
||||
data = json.load(f)
|
||||
self.native_language = data['native_language']
|
||||
self.languages = set(data['languages'])
|
||||
self.full_forms_for_languages = set(data.get('full_forms_for_languages', set()))
|
||||
self.templates = [ Template(template_name, self.native_language, self.languages, template_data)
|
||||
for template_name, template_data in data['templates'].items() ]
|
||||
self.templates = [
|
||||
Template(template_name, self.native_language, self.languages, template_data)
|
||||
for template_name, template_data in data['templates'].items()
|
||||
]
|
||||
self.tables = data['tables']
|
||||
|
||||
@staticmethod
|
||||
|
@ -94,7 +128,6 @@ class FromTablesGenerator(BaseGenerator):
|
|||
def _get_names_number_for_template(self):
|
||||
pass
|
||||
|
||||
|
||||
def get_names_number(self, genders=GENDER.ALL):
|
||||
templates = self._get_templates_slice(genders)
|
||||
number = sum([template.get_names_number(self.tables) for template in templates])
|
||||
|
@ -129,3 +162,132 @@ class FromTablesGenerator(BaseGenerator):
|
|||
for record in self.tables[last_table]:
|
||||
test.assertTrue(isinstance(record['languages'][language], list))
|
||||
test.assertEqual(len(record['languages'][language]), LANGUAGE_FORMS_LANGTH[language])
|
||||
|
||||
|
||||
class FromCSVTablesGenerator(FromTablesGenerator):
|
||||
|
||||
"""Variation of :py:calss:`FromTablesGenerator` that accepts path to 3 csv files as SOURCE.
|
||||
|
||||
Read docs of :py:meth:`source_loader` for more details.
|
||||
|
||||
"""
|
||||
|
||||
def source_loader(self, source_paths):
|
||||
"""Load source from 3 csv files.
|
||||
|
||||
First file should contain global settings:
|
||||
|
||||
* ``native_lagnauge,languages`` header on first row
|
||||
* appropriate values on following rows
|
||||
|
||||
Example::
|
||||
|
||||
native_lagnauge,languages
|
||||
ru,ru
|
||||
,en
|
||||
|
||||
Second file should contain templates:
|
||||
|
||||
* ``template_name,probability,genders,template`` header on first row
|
||||
* appropriate values on following rows (separate values with semicolon ";" in template column)
|
||||
|
||||
Example::
|
||||
|
||||
template_name,probability,genders,template
|
||||
male_1,5,m,prefixes;male_suffixes
|
||||
baby_1,1,m;f,prefixes;descriptive
|
||||
|
||||
Third file should contain tables with values for template slugs in all languages:
|
||||
|
||||
* first row should contain slugs with language code after colon for each
|
||||
* appropriate values on following rows. Multiple forms may be specified using semicolon as separator
|
||||
|
||||
Example::
|
||||
|
||||
prefixes:ru,prefixes:en,male_suffixes:ru,male_suffixes:en,descriptive:ru,descriptive:en
|
||||
Бж,Bzh,пра,pra,быстряк;быстряку,fasty
|
||||
дон;дону,don,Иван;Ивану,Ivan,Иванов;Иванову,Ivanov
|
||||
|
||||
Note: you may use slugs without ":lang_code" suffix in csv header of tables file. Such headers will be treated as headers for native language
|
||||
|
||||
If tables are missing for some slug then it is automatically created with values equeal to slug itself.
|
||||
So you may use some slugs without specifying tables data for them. Example for apostrophe and space:
|
||||
|
||||
male_1,5,m,prefixes;';male_suffixes
|
||||
male_full,5,m,first_name; ;last_name
|
||||
|
||||
"""
|
||||
if not isinstance(source_paths, Iterable) or len(source_paths) < 3:
|
||||
raise TypeError('FromCSVTablesGenerator.source_loader accepts list of 3 paths as argument. Got `%s` instead' % source_paths)
|
||||
self.native_language = ''
|
||||
self.languages = []
|
||||
self.templates = []
|
||||
self.tables = {}
|
||||
|
||||
with open(source_paths[0]) as settings_file:
|
||||
reader = unicodecsv.DictReader(settings_file, encoding='utf-8')
|
||||
for row in reader:
|
||||
new_native_language = row.get('native_language', '').strip()
|
||||
if new_native_language and not self.native_language:
|
||||
self.native_language = new_native_language
|
||||
elif self.native_language and new_native_language and self.native_language != new_native_language:
|
||||
raise exceptions.WrongCSVData(
|
||||
'Wrong settings csv file. Native language is already set to "%(native_language)s" but new value "%(new_value)s" is present on some row',
|
||||
native_language=self.native_language,
|
||||
new_value=new_native_language
|
||||
)
|
||||
|
||||
new_language = row.get('languages', '').strip()
|
||||
if new_language:
|
||||
self.languages.append(new_language)
|
||||
self.languages = set(self.languages)
|
||||
|
||||
template_slugs = []
|
||||
|
||||
with open(source_paths[1]) as templates_file:
|
||||
reader = unicodecsv.DictReader(templates_file, encoding='utf-8')
|
||||
for row in reader:
|
||||
template_data = {
|
||||
'probability': float(row['probability']),
|
||||
'genders': row['genders'].replace(' ', '').split(';'),
|
||||
'template': row['template'].split(';'),
|
||||
}
|
||||
self.templates.append(
|
||||
Template(row['template_name'], self.native_language, self.languages, template_data)
|
||||
)
|
||||
template_slugs.extend(template_data['template'])
|
||||
|
||||
template_slugs = set(template_slugs)
|
||||
|
||||
with open(source_paths[2]) as tables_file:
|
||||
reader = unicodecsv.DictReader(tables_file, encoding='utf-8')
|
||||
slugs = set([fieldname.split(':')[0] for fieldname in reader.fieldnames])
|
||||
for slug in slugs:
|
||||
self.tables[slug] = []
|
||||
for row in reader:
|
||||
for slug in slugs:
|
||||
table_item = {}
|
||||
for language in self.languages:
|
||||
value = row.get('%s:%s' % (slug, language), '')
|
||||
if not value and language == self.native_language:
|
||||
value = row.get(slug, '')
|
||||
if value:
|
||||
if value.find(';') > 0:
|
||||
value = value.split(';')
|
||||
table_item.setdefault('languages', {})[language] = value
|
||||
elif table_item:
|
||||
# some language already present but current is missing
|
||||
raise exceptions.WrongCSVData(
|
||||
'Missing language "%(language)s" for table "%(slug)s" with partial datum "%(table_item)s"',
|
||||
language=language, slug=slug, table_item=table_item,
|
||||
)
|
||||
if table_item:
|
||||
self.tables[slug].append(table_item)
|
||||
|
||||
for slug in template_slugs:
|
||||
if not self.tables.get(slug, ''):
|
||||
table_item = {'languages': {}}
|
||||
for language in self.languages:
|
||||
table_item['languages'][language] = slug
|
||||
|
||||
self.tables.setdefault(slug, []).append(table_item)
|
||||
|
|
|
@ -2,9 +2,18 @@
|
|||
|
||||
import os
|
||||
|
||||
from pynames.from_tables_generator import FromTablesGenerator
|
||||
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
|
||||
|
||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixtures')
|
||||
|
||||
|
||||
class GoblinGenerator(FromTablesGenerator):
|
||||
SOURCE = os.path.join(FIXTURES_DIR, 'goblin_names_tables.json')
|
||||
|
||||
|
||||
class GobberFullnameGenerator(FromCSVTablesGenerator):
|
||||
SOURCE = [
|
||||
os.path.join(FIXTURES_DIR, 'IK_gobber_names_settings.csv'),
|
||||
os.path.join(FIXTURES_DIR, 'IK_gobber_names_templates.csv'),
|
||||
os.path.join(FIXTURES_DIR, 'IK_gobber_names_tables.csv'),
|
||||
]
|
||||
|
|
2
pynames/goblin/fixtures/IK_gobber_names_settings.csv
Normal file
2
pynames/goblin/fixtures/IK_gobber_names_settings.csv
Normal file
|
@ -0,0 +1,2 @@
|
|||
languages,native_language
|
||||
en,en
|
|
55
pynames/goblin/fixtures/IK_gobber_names_tables.csv
Normal file
55
pynames/goblin/fixtures/IK_gobber_names_tables.csv
Normal file
|
@ -0,0 +1,55 @@
|
|||
gobber_first_name_male,gobber_first_name_female,gobber_descriptive,gobber_descriptive_meaning
|
||||
Ad,Agghi,ag,"untouchable, noisy or annoying"
|
||||
Ant,Ala,ak,"untouchable, noisy or annoying"
|
||||
Az,Anh,agen,"tough, stubborn "
|
||||
Boll,Ano,aken,"tough, stubborn"
|
||||
Bert,Ara,aggan,"bloodthirsty, gullible"
|
||||
Bork,Atra,akkan,"bloodthirsty, gullible"
|
||||
Dag,Bel,ahun,"always ready, lascivious or sadistic"
|
||||
Dar,Da,alog,"wise or clever, meek "
|
||||
Gek,Dar,alok,"wise or clever, meek"
|
||||
Gork,Enda,amun,"skilled camouflager, unimportant"
|
||||
Gort,Gan,aneg,"ferocious, irrational & gluttonous"
|
||||
Gul,Gara,anek,"ferocious, irrational & gluttonous"
|
||||
Gun,Geka,anen,"stealthy, nervous or insecure"
|
||||
Hek,Gola,anheg,"toolmaker, old & senile"
|
||||
Hok,Gomm,anhek,"toolmaker, old & senile"
|
||||
Kanh,Gren,ano,"adept or nimble, weak"
|
||||
Kug,Grend,arag,"powerful, smells horrible "
|
||||
Lan,Kat,arak,"powerful, smells horrible"
|
||||
Lok,Lom,atol,"good friend, leader in battle"
|
||||
Mo,Mari,dara,"healthy or long lived, conservative"
|
||||
Mog,Meg,egga,"radiant, manic"
|
||||
Nat,Nan,ekka,"radiant, manic"
|
||||
Nun,Rala,eleg,"precise, pretty & delicate"
|
||||
Ork,Ren,elek,"precise, pretty & delicate"
|
||||
Oz,Sele,emun,"efficient, aggressive"
|
||||
Pok,Tere,gamun,"quiet and withdrawn, maniacal"
|
||||
Poon,Ugga,gana,"exceptional, obese"
|
||||
Rak,Ula,gar,"fish, provider"
|
||||
Ranh,Vel,garda,"bold, uncouth & vulgar"
|
||||
Tak,Walu,gekan,"craftsman or artist, fickle"
|
||||
Thak,Wikk,heleg,"heroic, domineering"
|
||||
Tok,,helek,"heroic, domineering"
|
||||
Tot,,holdt,"cave or hole, fortress"
|
||||
Tun,,kam,"dexterous, cowardly"
|
||||
Tur,,kan,"sly, unrelaible"
|
||||
Un,,meleg,"restrained, maudlin & morbid"
|
||||
Vog,,melek,"restrained, maudlin & morbid"
|
||||
Vorg,,obalt,"prankster, hardhearted"
|
||||
Zhag,,omog,"animal friend, good cook"
|
||||
,,omok,"animal friend, good cook"
|
||||
,,ona,"healer or brewer, eccentric"
|
||||
,,onan,"strong, loner"
|
||||
,,oran,"distinguished, strange"
|
||||
,,ralog,"clever and creative, impractical"
|
||||
,,ralok,"clever and creative, impractical"
|
||||
,,rel,"observant, quiet and passive"
|
||||
,,uladar,"innovative, cheater"
|
||||
,,ulag,"intense, aloof"
|
||||
,,ulak,"intense, aloof"
|
||||
,,ulug,"insatiable, unstoppable"
|
||||
,,uluk,"insatiable, unstoppable"
|
||||
,,uman,"faithful and determined, fanatical"
|
||||
,,uren,"dreamer, sickly"
|
||||
,,vi,"shrewd, scavenger "
|
|
3
pynames/goblin/fixtures/IK_gobber_names_templates.csv
Normal file
3
pynames/goblin/fixtures/IK_gobber_names_templates.csv
Normal file
|
@ -0,0 +1,3 @@
|
|||
template_name,probability,genders,template
|
||||
gobber_male,1,m,gobber_first_name_male;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive
|
||||
gobber_male,1,f,gobber_first_name_female;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive
|
|
6
pynames/goblin/fixtures/README.txt
Normal file
6
pynames/goblin/fixtures/README.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
|
||||
IK_gobber_names_*.csv is a dataset for IronKingdoms gobber names.
|
||||
|
||||
Iron Kingdoms and other names are property of Privateer Press
|
||||
|
||||
This dataset uses the name collection that was provided by Gin on the forum<http://privateerpressforums.com/showthread.php?122817-Names-in-Iron-Kingdoms> of "Privateer Press"<http://www.privateerpress.com>
|
3
pynames/tests/fixtures/test_from_csv_tables_generator_settings.csv
vendored
Normal file
3
pynames/tests/fixtures/test_from_csv_tables_generator_settings.csv
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
languages,native_language
|
||||
ru,en
|
||||
en,
|
|
4
pynames/tests/fixtures/test_from_csv_tables_generator_tables.csv
vendored
Normal file
4
pynames/tests/fixtures/test_from_csv_tables_generator_tables.csv
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
table1:en,table1:ru,table2:en,table2:ru,table3:en,table3:ru,table4:en,table4:ru
|
||||
T1EN1,T1RU1,_m_en_1,_m_ru_1,_f_en_1,_f_ru_1;_f_ru_1_form,','
|
||||
T1EN2,T1RU2,_m_en_2,_m_ru_2,,,
|
||||
T1EN3,T1RU3,,,,,
|
|
4
pynames/tests/fixtures/test_from_csv_tables_generator_templates.csv
vendored
Normal file
4
pynames/tests/fixtures/test_from_csv_tables_generator_templates.csv
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
template_name,probability,genders,template
|
||||
t1,2,m,table1;table2
|
||||
t2,2,f,table1;table3
|
||||
t3,1,m;f,table1;table2;table4;table3
|
|
|
@ -4,7 +4,7 @@ import os
|
|||
import unittest
|
||||
|
||||
from pynames.relations import GENDER, LANGUAGE
|
||||
from pynames.from_tables_generator import FromTablesGenerator
|
||||
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
|
||||
|
||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixtures')
|
||||
|
||||
|
@ -91,3 +91,38 @@ class TestFromTablesGenerator(unittest.TestCase):
|
|||
name = generator.get_name(genders=[GENDER.FEMALE])
|
||||
self.assertTrue(name.get_for(GENDER.FEMALE, LANGUAGE.EN) in self.NAMES_EN_FEMALE)
|
||||
self.assertEqual(name.get_forms_for(GENDER.FEMALE, LANGUAGE.EN), None)
|
||||
|
||||
|
||||
class TestFromCSVTablesGenerator(unittest.TestCase):
|
||||
|
||||
class TestJSONGenerator(FromTablesGenerator):
|
||||
SOURCE = os.path.join(FIXTURES_DIR, 'test_from_tables_generator.json')
|
||||
|
||||
class TestCSVGenerator(FromCSVTablesGenerator):
|
||||
SOURCE = [
|
||||
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_settings.csv'),
|
||||
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_templates.csv'),
|
||||
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_tables.csv')
|
||||
]
|
||||
|
||||
def test_init_state_equal(self):
|
||||
"""test that after init CSV and JSON generators have equal 'native_language', 'languages', 'templates', 'tables' attrubytes.
|
||||
|
||||
This is the only test needed because if state after init is the same then
|
||||
behaviour is the same.
|
||||
|
||||
"""
|
||||
json_generator = self.TestJSONGenerator()
|
||||
csv_generator = self.TestCSVGenerator()
|
||||
|
||||
for attr_name in ['native_language', 'languages', 'templates', 'tables']:
|
||||
try:
|
||||
json_attr = getattr(json_generator, attr_name)
|
||||
csv_attr = getattr(csv_generator, attr_name)
|
||||
if isinstance(json_attr, list):
|
||||
self.assertItemsEqual(csv_attr, json_attr)
|
||||
else:
|
||||
self.assertEqual(csv_attr, json_attr)
|
||||
except Exception:
|
||||
from nose.tools import set_trace; set_trace()
|
||||
raise
|
||||
|
|
|
@ -9,7 +9,7 @@ import pynames
|
|||
from pynames.relations import GENDER
|
||||
from pynames.base import BaseGenerator
|
||||
from pynames.from_list_generator import FromListGenerator
|
||||
from pynames.from_tables_generator import FromTablesGenerator
|
||||
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
|
||||
|
||||
|
||||
# TODO: test forms:
|
||||
|
@ -42,7 +42,7 @@ def get_all_generators():
|
|||
if not isinstance(generator, type) or not issubclass(generator, BaseGenerator):
|
||||
continue
|
||||
|
||||
if generator in (FromTablesGenerator, FromListGenerator):
|
||||
if generator in (FromTablesGenerator, FromListGenerator, FromCSVTablesGenerator):
|
||||
continue
|
||||
|
||||
generators.append(generator)
|
||||
|
|
3
setup.py
3
setup.py
|
@ -12,6 +12,7 @@ setuptools.setup(
|
|||
description = "characters' name generation library",
|
||||
long_description = open('README.md').read(),
|
||||
include_package_data = True, # setuptools-git MUST be installed
|
||||
test_suite = 'tests'# ,
|
||||
test_suite = 'tests',
|
||||
install_requires = ['unicodecsv'],
|
||||
# package_data = { '': ['*.json'] }
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue