added FromCSVTablesGenerator. added IronKingdoms gobber fullnames generator

This commit is contained in:
Yaroslav Klyuyev 2015-02-09 22:20:33 +02:00
parent 30d93facce
commit 33b303f34e
13 changed files with 306 additions and 14 deletions

View file

@ -1,5 +1,6 @@
# coding: utf-8
class PynamesError(Exception):
MSG = None
@ -7,7 +8,6 @@ class PynamesError(Exception):
super(PynamesError, self).__init__(self.MSG % kwargs)
class NoDefaultNameValue(PynamesError):
MSG = u'Name: can not get default value for name with data: %(raw_data)r'
@ -15,6 +15,7 @@ class NoDefaultNameValue(PynamesError):
class FromListGeneratorError(PynamesError):
pass
class NoNamesLoadedFromListError(FromListGeneratorError):
MSG = u'no names loaded from "%(source)s"'
@ -22,9 +23,16 @@ class NoNamesLoadedFromListError(FromListGeneratorError):
class FromTablesGeneratorError(PynamesError):
pass
class WrongTemplateStructureError(FromTablesGeneratorError):
MSG = u'wrong template structure - cannot choose template for genders %(genders)r with template source: "%(source)s"'
class NotEqualFormsLengths(FromTablesGeneratorError):
MSG = u'not equal forms lengths: [%(left)r] and [%(right)r]'
class WrongCSVData(FromTablesGeneratorError):
def __init__(self, msg, **kwargs):
self.MSG = msg
super(WrongCSVData, self).__init__(**kwargs)

View file

@ -1,7 +1,14 @@
# coding: utf-8
# python lib:
import json
import random
from collections import Iterable
# thirdparties:
import unicodecsv
# pynames:
from pynames.relations import GENDER, LANGUAGE, LANGUAGE_FORMS_LANGTH
from pynames.names import Name
from pynames.base import BaseGenerator
@ -45,15 +52,38 @@ class Template(object):
return left + right
def get_name(self, tables):
languages = dict( (lang, u'') for lang in self.languages)
languages = dict(
(lang, u'') for lang in self.languages
)
for slug in self.template:
record = random.choice(tables[slug])
languages = { lang:self.merge_forms(forms, record['languages'][lang]) for lang, forms in languages.iteritems()}
languages = {
lang: self.merge_forms(forms, record['languages'][lang])
for lang, forms in languages.iteritems()
}
genders = dict( (gender, languages) for gender in self.genders)
genders = dict(
(gender, languages)
for gender in self.genders
)
return Name(self.native_language, {'genders': genders})
def __eq__(self, other):
return (
self.native_language == other.native_language
and self.languages == other.languages
and self.probability == other.probability
and self.genders == other.genders
and self.template == other.template
)
def __hash__(self):
return hash((self.native_language, self.languages, self.probability, self.genders, ';'.join(self.template)))
def __repr__(self):
return "<pynames.from_tables_generator.Template: %s=%s>" % (self.name, self.template)
class FromTablesGenerator(BaseGenerator):
@ -64,18 +94,22 @@ class FromTablesGenerator(BaseGenerator):
self.templates_choices = {}
self.templates = []
self.tables = {}
self.source_loader(self.SOURCE)
if self.SOURCE is None:
def source_loader(self, source):
if source is None:
error_msg = 'FromTablesGenerator: you must make subclass of FromTablesGenerator and define attribute SOURCE in it.'
raise NotImplementedError(error_msg)
with open(self.SOURCE) as f:
with open(source) as f:
data = json.load(f)
self.native_language = data['native_language']
self.languages = set(data['languages'])
self.full_forms_for_languages = set(data.get('full_forms_for_languages', set()))
self.templates = [ Template(template_name, self.native_language, self.languages, template_data)
for template_name, template_data in data['templates'].items() ]
self.templates = [
Template(template_name, self.native_language, self.languages, template_data)
for template_name, template_data in data['templates'].items()
]
self.tables = data['tables']
@staticmethod
@ -94,7 +128,6 @@ class FromTablesGenerator(BaseGenerator):
def _get_names_number_for_template(self):
pass
def get_names_number(self, genders=GENDER.ALL):
templates = self._get_templates_slice(genders)
number = sum([template.get_names_number(self.tables) for template in templates])
@ -129,3 +162,132 @@ class FromTablesGenerator(BaseGenerator):
for record in self.tables[last_table]:
test.assertTrue(isinstance(record['languages'][language], list))
test.assertEqual(len(record['languages'][language]), LANGUAGE_FORMS_LANGTH[language])
class FromCSVTablesGenerator(FromTablesGenerator):
"""Variation of :py:calss:`FromTablesGenerator` that accepts path to 3 csv files as SOURCE.
Read docs of :py:meth:`source_loader` for more details.
"""
def source_loader(self, source_paths):
"""Load source from 3 csv files.
First file should contain global settings:
* ``native_lagnauge,languages`` header on first row
* appropriate values on following rows
Example::
native_lagnauge,languages
ru,ru
,en
Second file should contain templates:
* ``template_name,probability,genders,template`` header on first row
* appropriate values on following rows (separate values with semicolon ";" in template column)
Example::
template_name,probability,genders,template
male_1,5,m,prefixes;male_suffixes
baby_1,1,m;f,prefixes;descriptive
Third file should contain tables with values for template slugs in all languages:
* first row should contain slugs with language code after colon for each
* appropriate values on following rows. Multiple forms may be specified using semicolon as separator
Example::
prefixes:ru,prefixes:en,male_suffixes:ru,male_suffixes:en,descriptive:ru,descriptive:en
Бж,Bzh,пра,pra,быстряк;быстряку,fasty
дон;дону,don,Иван;Ивану,Ivan,Иванов;Иванову,Ivanov
Note: you may use slugs without ":lang_code" suffix in csv header of tables file. Such headers will be treated as headers for native language
If tables are missing for some slug then it is automatically created with values equeal to slug itself.
So you may use some slugs without specifying tables data for them. Example for apostrophe and space:
male_1,5,m,prefixes;';male_suffixes
male_full,5,m,first_name; ;last_name
"""
if not isinstance(source_paths, Iterable) or len(source_paths) < 3:
raise TypeError('FromCSVTablesGenerator.source_loader accepts list of 3 paths as argument. Got `%s` instead' % source_paths)
self.native_language = ''
self.languages = []
self.templates = []
self.tables = {}
with open(source_paths[0]) as settings_file:
reader = unicodecsv.DictReader(settings_file, encoding='utf-8')
for row in reader:
new_native_language = row.get('native_language', '').strip()
if new_native_language and not self.native_language:
self.native_language = new_native_language
elif self.native_language and new_native_language and self.native_language != new_native_language:
raise exceptions.WrongCSVData(
'Wrong settings csv file. Native language is already set to "%(native_language)s" but new value "%(new_value)s" is present on some row',
native_language=self.native_language,
new_value=new_native_language
)
new_language = row.get('languages', '').strip()
if new_language:
self.languages.append(new_language)
self.languages = set(self.languages)
template_slugs = []
with open(source_paths[1]) as templates_file:
reader = unicodecsv.DictReader(templates_file, encoding='utf-8')
for row in reader:
template_data = {
'probability': float(row['probability']),
'genders': row['genders'].replace(' ', '').split(';'),
'template': row['template'].split(';'),
}
self.templates.append(
Template(row['template_name'], self.native_language, self.languages, template_data)
)
template_slugs.extend(template_data['template'])
template_slugs = set(template_slugs)
with open(source_paths[2]) as tables_file:
reader = unicodecsv.DictReader(tables_file, encoding='utf-8')
slugs = set([fieldname.split(':')[0] for fieldname in reader.fieldnames])
for slug in slugs:
self.tables[slug] = []
for row in reader:
for slug in slugs:
table_item = {}
for language in self.languages:
value = row.get('%s:%s' % (slug, language), '')
if not value and language == self.native_language:
value = row.get(slug, '')
if value:
if value.find(';') > 0:
value = value.split(';')
table_item.setdefault('languages', {})[language] = value
elif table_item:
# some language already present but current is missing
raise exceptions.WrongCSVData(
'Missing language "%(language)s" for table "%(slug)s" with partial datum "%(table_item)s"',
language=language, slug=slug, table_item=table_item,
)
if table_item:
self.tables[slug].append(table_item)
for slug in template_slugs:
if not self.tables.get(slug, ''):
table_item = {'languages': {}}
for language in self.languages:
table_item['languages'][language] = slug
self.tables.setdefault(slug, []).append(table_item)

View file

@ -2,9 +2,18 @@
import os
from pynames.from_tables_generator import FromTablesGenerator
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixtures')
class GoblinGenerator(FromTablesGenerator):
SOURCE = os.path.join(FIXTURES_DIR, 'goblin_names_tables.json')
class GobberFullnameGenerator(FromCSVTablesGenerator):
SOURCE = [
os.path.join(FIXTURES_DIR, 'IK_gobber_names_settings.csv'),
os.path.join(FIXTURES_DIR, 'IK_gobber_names_templates.csv'),
os.path.join(FIXTURES_DIR, 'IK_gobber_names_tables.csv'),
]

View file

@ -0,0 +1,2 @@
languages,native_language
en,en
1 languages native_language
2 en en

View file

@ -0,0 +1,55 @@
gobber_first_name_male,gobber_first_name_female,gobber_descriptive,gobber_descriptive_meaning
Ad,Agghi,ag,"untouchable, noisy or annoying"
Ant,Ala,ak,"untouchable, noisy or annoying"
Az,Anh,agen,"tough, stubborn "
Boll,Ano,aken,"tough, stubborn"
Bert,Ara,aggan,"bloodthirsty, gullible"
Bork,Atra,akkan,"bloodthirsty, gullible"
Dag,Bel,ahun,"always ready, lascivious or sadistic"
Dar,Da,alog,"wise or clever, meek "
Gek,Dar,alok,"wise or clever, meek"
Gork,Enda,amun,"skilled camouflager, unimportant"
Gort,Gan,aneg,"ferocious, irrational & gluttonous"
Gul,Gara,anek,"ferocious, irrational & gluttonous"
Gun,Geka,anen,"stealthy, nervous or insecure"
Hek,Gola,anheg,"toolmaker, old & senile"
Hok,Gomm,anhek,"toolmaker, old & senile"
Kanh,Gren,ano,"adept or nimble, weak"
Kug,Grend,arag,"powerful, smells horrible "
Lan,Kat,arak,"powerful, smells horrible"
Lok,Lom,atol,"good friend, leader in battle"
Mo,Mari,dara,"healthy or long lived, conservative"
Mog,Meg,egga,"radiant, manic"
Nat,Nan,ekka,"radiant, manic"
Nun,Rala,eleg,"precise, pretty & delicate"
Ork,Ren,elek,"precise, pretty & delicate"
Oz,Sele,emun,"efficient, aggressive"
Pok,Tere,gamun,"quiet and withdrawn, maniacal"
Poon,Ugga,gana,"exceptional, obese"
Rak,Ula,gar,"fish, provider"
Ranh,Vel,garda,"bold, uncouth & vulgar"
Tak,Walu,gekan,"craftsman or artist, fickle"
Thak,Wikk,heleg,"heroic, domineering"
Tok,,helek,"heroic, domineering"
Tot,,holdt,"cave or hole, fortress"
Tun,,kam,"dexterous, cowardly"
Tur,,kan,"sly, unrelaible"
Un,,meleg,"restrained, maudlin & morbid"
Vog,,melek,"restrained, maudlin & morbid"
Vorg,,obalt,"prankster, hardhearted"
Zhag,,omog,"animal friend, good cook"
,,omok,"animal friend, good cook"
,,ona,"healer or brewer, eccentric"
,,onan,"strong, loner"
,,oran,"distinguished, strange"
,,ralog,"clever and creative, impractical"
,,ralok,"clever and creative, impractical"
,,rel,"observant, quiet and passive"
,,uladar,"innovative, cheater"
,,ulag,"intense, aloof"
,,ulak,"intense, aloof"
,,ulug,"insatiable, unstoppable"
,,uluk,"insatiable, unstoppable"
,,uman,"faithful and determined, fanatical"
,,uren,"dreamer, sickly"
,,vi,"shrewd, scavenger "
1 gobber_first_name_male gobber_first_name_female gobber_descriptive gobber_descriptive_meaning
2 Ad Agghi ag untouchable, noisy or annoying
3 Ant Ala ak untouchable, noisy or annoying
4 Az Anh agen tough, stubborn
5 Boll Ano aken tough, stubborn
6 Bert Ara aggan bloodthirsty, gullible
7 Bork Atra akkan bloodthirsty, gullible
8 Dag Bel ahun always ready, lascivious or sadistic
9 Dar Da alog wise or clever, meek
10 Gek Dar alok wise or clever, meek
11 Gork Enda amun skilled camouflager, unimportant
12 Gort Gan aneg ferocious, irrational & gluttonous
13 Gul Gara anek ferocious, irrational & gluttonous
14 Gun Geka anen stealthy, nervous or insecure
15 Hek Gola anheg toolmaker, old & senile
16 Hok Gomm anhek toolmaker, old & senile
17 Kanh Gren ano adept or nimble, weak
18 Kug Grend arag powerful, smells horrible
19 Lan Kat arak powerful, smells horrible
20 Lok Lom atol good friend, leader in battle
21 Mo Mari dara healthy or long lived, conservative
22 Mog Meg egga radiant, manic
23 Nat Nan ekka radiant, manic
24 Nun Rala eleg precise, pretty & delicate
25 Ork Ren elek precise, pretty & delicate
26 Oz Sele emun efficient, aggressive
27 Pok Tere gamun quiet and withdrawn, maniacal
28 Poon Ugga gana exceptional, obese
29 Rak Ula gar fish, provider
30 Ranh Vel garda bold, uncouth & vulgar
31 Tak Walu gekan craftsman or artist, fickle
32 Thak Wikk heleg heroic, domineering
33 Tok helek heroic, domineering
34 Tot holdt cave or hole, fortress
35 Tun kam dexterous, cowardly
36 Tur kan sly, unrelaible
37 Un meleg restrained, maudlin & morbid
38 Vog melek restrained, maudlin & morbid
39 Vorg obalt prankster, hardhearted
40 Zhag omog animal friend, good cook
41 omok animal friend, good cook
42 ona healer or brewer, eccentric
43 onan strong, loner
44 oran distinguished, strange
45 ralog clever and creative, impractical
46 ralok clever and creative, impractical
47 rel observant, quiet and passive
48 uladar innovative, cheater
49 ulag intense, aloof
50 ulak intense, aloof
51 ulug insatiable, unstoppable
52 uluk insatiable, unstoppable
53 uman faithful and determined, fanatical
54 uren dreamer, sickly
55 vi shrewd, scavenger

View file

@ -0,0 +1,3 @@
template_name,probability,genders,template
gobber_male,1,m,gobber_first_name_male;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive
gobber_male,1,f,gobber_first_name_female;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive
1 template_name probability genders template
2 gobber_male 1 m gobber_first_name_male;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive
3 gobber_male 1 f gobber_first_name_female;-;gobber_first_name_female;-;gobber_first_name_male;-;gobber_descriptive

View file

@ -0,0 +1,6 @@
IK_gobber_names_*.csv is a dataset for IronKingdoms gobber names.
Iron Kingdoms and other names are property of Privateer Press
This dataset uses the name collection that was provided by Gin on the forum<http://privateerpressforums.com/showthread.php?122817-Names-in-Iron-Kingdoms> of "Privateer Press"<http://www.privateerpress.com>

View file

@ -0,0 +1,3 @@
languages,native_language
ru,en
en,
1 languages native_language
2 ru en
3 en

View file

@ -0,0 +1,4 @@
table1:en,table1:ru,table2:en,table2:ru,table3:en,table3:ru,table4:en,table4:ru
T1EN1,T1RU1,_m_en_1,_m_ru_1,_f_en_1,_f_ru_1;_f_ru_1_form,','
T1EN2,T1RU2,_m_en_2,_m_ru_2,,,
T1EN3,T1RU3,,,,,
1 table1:en,table1:ru,table2:en,table2:ru,table3:en,table3:ru,table4:en,table4:ru
2 T1EN1,T1RU1,_m_en_1,_m_ru_1,_f_en_1,_f_ru_1;_f_ru_1_form,','
3 T1EN2,T1RU2,_m_en_2,_m_ru_2,,,
4 T1EN3,T1RU3,,,,,

View file

@ -0,0 +1,4 @@
template_name,probability,genders,template
t1,2,m,table1;table2
t2,2,f,table1;table3
t3,1,m;f,table1;table2;table4;table3
1 template_name probability genders template
2 t1 2 m table1;table2
3 t2 2 f table1;table3
4 t3 1 m;f table1;table2;table4;table3

View file

@ -4,7 +4,7 @@ import os
import unittest
from pynames.relations import GENDER, LANGUAGE
from pynames.from_tables_generator import FromTablesGenerator
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixtures')
@ -91,3 +91,38 @@ class TestFromTablesGenerator(unittest.TestCase):
name = generator.get_name(genders=[GENDER.FEMALE])
self.assertTrue(name.get_for(GENDER.FEMALE, LANGUAGE.EN) in self.NAMES_EN_FEMALE)
self.assertEqual(name.get_forms_for(GENDER.FEMALE, LANGUAGE.EN), None)
class TestFromCSVTablesGenerator(unittest.TestCase):
class TestJSONGenerator(FromTablesGenerator):
SOURCE = os.path.join(FIXTURES_DIR, 'test_from_tables_generator.json')
class TestCSVGenerator(FromCSVTablesGenerator):
SOURCE = [
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_settings.csv'),
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_templates.csv'),
os.path.join(FIXTURES_DIR, 'test_from_csv_tables_generator_tables.csv')
]
def test_init_state_equal(self):
"""test that after init CSV and JSON generators have equal 'native_language', 'languages', 'templates', 'tables' attrubytes.
This is the only test needed because if state after init is the same then
behaviour is the same.
"""
json_generator = self.TestJSONGenerator()
csv_generator = self.TestCSVGenerator()
for attr_name in ['native_language', 'languages', 'templates', 'tables']:
try:
json_attr = getattr(json_generator, attr_name)
csv_attr = getattr(csv_generator, attr_name)
if isinstance(json_attr, list):
self.assertItemsEqual(csv_attr, json_attr)
else:
self.assertEqual(csv_attr, json_attr)
except Exception:
from nose.tools import set_trace; set_trace()
raise

View file

@ -9,7 +9,7 @@ import pynames
from pynames.relations import GENDER
from pynames.base import BaseGenerator
from pynames.from_list_generator import FromListGenerator
from pynames.from_tables_generator import FromTablesGenerator
from pynames.from_tables_generator import FromTablesGenerator, FromCSVTablesGenerator
# TODO: test forms:
@ -42,7 +42,7 @@ def get_all_generators():
if not isinstance(generator, type) or not issubclass(generator, BaseGenerator):
continue
if generator in (FromTablesGenerator, FromListGenerator):
if generator in (FromTablesGenerator, FromListGenerator, FromCSVTablesGenerator):
continue
generators.append(generator)

View file

@ -12,6 +12,7 @@ setuptools.setup(
description = "characters' name generation library",
long_description = open('README.md').read(),
include_package_data = True, # setuptools-git MUST be installed
test_suite = 'tests'# ,
test_suite = 'tests',
install_requires = ['unicodecsv'],
# package_data = { '': ['*.json'] }
)