mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 08:00:13 +01:00
feat: add keyword normalization and protection grant detection, fix template syntax and polling issues
This commit is contained in:
parent
86ec68acb4
commit
06d8796316
17 changed files with 1692 additions and 611 deletions
182
code/tests/test_keyword_normalization.py
Normal file
182
code/tests/test_keyword_normalization.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
"""Tests for keyword normalization (M1 - Tagging Refinement)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from code.tagging import tag_utils, tag_constants
|
||||
|
||||
|
||||
class TestKeywordNormalization:
|
||||
"""Test suite for normalize_keywords function."""
|
||||
|
||||
def test_canonical_mappings(self):
|
||||
"""Test that variant keywords map to canonical forms."""
|
||||
raw = ['Commander Ninjutsu', 'Flying', 'Trample']
|
||||
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
||||
frequency_map = {
|
||||
'Commander Ninjutsu': 2,
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Ninjutsu' in result
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Commander Ninjutsu' not in result
|
||||
|
||||
def test_singleton_pruning(self):
|
||||
"""Test that singleton keywords are pruned unless allowlisted."""
|
||||
raw = ['Allons-y!', 'Flying', 'Take 59 Flights of Stairs']
|
||||
allowlist = {'Flying'} # Only Flying is allowlisted
|
||||
frequency_map = {
|
||||
'Allons-y!': 1,
|
||||
'Flying': 100,
|
||||
'Take 59 Flights of Stairs': 1
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Allons-y!' not in result
|
||||
assert 'Take 59 Flights of Stairs' not in result
|
||||
|
||||
def test_case_normalization(self):
|
||||
"""Test that keywords are normalized to proper case."""
|
||||
raw = ['flying', 'TRAMPLE', 'vigilance']
|
||||
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
||||
frequency_map = {
|
||||
'flying': 100,
|
||||
'TRAMPLE': 50,
|
||||
'vigilance': 75
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
# Case normalization happens via the map
|
||||
# If not in map, original case is preserved
|
||||
assert len(result) == 3
|
||||
|
||||
def test_partner_exclusion(self):
|
||||
"""Test that partner keywords remain excluded."""
|
||||
raw = ['Partner', 'Flying', 'Trample']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Partner': 50,
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Partner' not in result # Excluded
|
||||
assert 'partner' not in result
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Test that empty input returns empty list."""
|
||||
result = tag_utils.normalize_keywords([], set(), {})
|
||||
assert result == []
|
||||
|
||||
def test_whitespace_handling(self):
|
||||
"""Test that whitespace is properly stripped."""
|
||||
raw = [' Flying ', 'Trample ', ' Vigilance']
|
||||
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50,
|
||||
'Vigilance': 75
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Vigilance' in result
|
||||
|
||||
def test_deduplication(self):
|
||||
"""Test that duplicate keywords are deduplicated."""
|
||||
raw = ['Flying', 'Flying', 'Trample', 'Flying']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert result.count('Flying') == 1
|
||||
assert result.count('Trample') == 1
|
||||
|
||||
def test_non_string_entries_skipped(self):
|
||||
"""Test that non-string entries are safely skipped."""
|
||||
raw = ['Flying', None, 123, 'Trample', '']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert len(result) == 2
|
||||
|
||||
def test_invalid_input_raises_error(self):
|
||||
"""Test that non-iterable input raises ValueError."""
|
||||
with pytest.raises(ValueError, match="raw must be iterable"):
|
||||
tag_utils.normalize_keywords("not-a-list", set(), {})
|
||||
|
||||
def test_allowlist_preserves_singletons(self):
|
||||
"""Test that allowlisted keywords survive even if they're singletons."""
|
||||
raw = ['Myriad', 'Flying', 'Cascade']
|
||||
allowlist = {'Flying', 'Myriad', 'Cascade'} # All allowlisted
|
||||
frequency_map = {
|
||||
'Myriad': 1, # Singleton
|
||||
'Flying': 100,
|
||||
'Cascade': 1 # Singleton
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Myriad' in result # Preserved despite being singleton
|
||||
assert 'Flying' in result
|
||||
assert 'Cascade' in result # Preserved despite being singleton
|
||||
|
||||
|
||||
class TestKeywordIntegration:
|
||||
"""Integration tests for keyword normalization in tagging flow."""
|
||||
|
||||
def test_normalization_preserves_evergreen_keywords(self):
|
||||
"""Test that common evergreen keywords are always preserved."""
|
||||
evergreen = ['Flying', 'Trample', 'Vigilance', 'Haste', 'Deathtouch', 'Lifelink']
|
||||
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
||||
frequency_map = {kw: 100 for kw in evergreen} # All common
|
||||
|
||||
result = tag_utils.normalize_keywords(evergreen, allowlist, frequency_map)
|
||||
|
||||
for kw in evergreen:
|
||||
assert kw in result
|
||||
|
||||
def test_crossover_keywords_pruned(self):
|
||||
"""Test that crossover-specific singletons are pruned."""
|
||||
crossover_singletons = [
|
||||
'Gae Bolg', # Final Fantasy
|
||||
'Psychic Defense', # Warhammer 40K
|
||||
'Allons-y!', # Doctor Who
|
||||
'Flying' # Evergreen (control)
|
||||
]
|
||||
allowlist = {'Flying'} # Only Flying allowed
|
||||
frequency_map = {
|
||||
'Gae Bolg': 1,
|
||||
'Psychic Defense': 1,
|
||||
'Allons-y!': 1,
|
||||
'Flying': 100
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(crossover_singletons, allowlist, frequency_map)
|
||||
|
||||
assert result == ['Flying'] # Only evergreen survived
|
||||
Loading…
Add table
Add a link
Reference in a new issue