mtg_python_deckbuilder/code/tests/test_keyword_normalization.py

183 lines
6.3 KiB
Python
Raw Normal View History

"""Tests for keyword normalization (M1 - Tagging Refinement)."""
from __future__ import annotations
import pytest
from code.tagging import tag_utils, tag_constants
class TestKeywordNormalization:
"""Test suite for normalize_keywords function."""
def test_canonical_mappings(self):
"""Test that variant keywords map to canonical forms."""
raw = ['Commander Ninjutsu', 'Flying', 'Trample']
allowlist = tag_constants.KEYWORD_ALLOWLIST
frequency_map = {
'Commander Ninjutsu': 2,
'Flying': 100,
'Trample': 50
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Ninjutsu' in result
assert 'Flying' in result
assert 'Trample' in result
assert 'Commander Ninjutsu' not in result
def test_singleton_pruning(self):
"""Test that singleton keywords are pruned unless allowlisted."""
raw = ['Allons-y!', 'Flying', 'Take 59 Flights of Stairs']
allowlist = {'Flying'} # Only Flying is allowlisted
frequency_map = {
'Allons-y!': 1,
'Flying': 100,
'Take 59 Flights of Stairs': 1
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Flying' in result
assert 'Allons-y!' not in result
assert 'Take 59 Flights of Stairs' not in result
def test_case_normalization(self):
"""Test that keywords are normalized to proper case."""
raw = ['flying', 'TRAMPLE', 'vigilance']
allowlist = {'Flying', 'Trample', 'Vigilance'}
frequency_map = {
'flying': 100,
'TRAMPLE': 50,
'vigilance': 75
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
# Case normalization happens via the map
# If not in map, original case is preserved
assert len(result) == 3
def test_partner_exclusion(self):
"""Test that partner keywords remain excluded."""
raw = ['Partner', 'Flying', 'Trample']
allowlist = {'Flying', 'Trample'}
frequency_map = {
'Partner': 50,
'Flying': 100,
'Trample': 50
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Flying' in result
assert 'Trample' in result
assert 'Partner' not in result # Excluded
assert 'partner' not in result
def test_empty_input(self):
"""Test that empty input returns empty list."""
result = tag_utils.normalize_keywords([], set(), {})
assert result == []
def test_whitespace_handling(self):
"""Test that whitespace is properly stripped."""
raw = [' Flying ', 'Trample ', ' Vigilance']
allowlist = {'Flying', 'Trample', 'Vigilance'}
frequency_map = {
'Flying': 100,
'Trample': 50,
'Vigilance': 75
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Flying' in result
assert 'Trample' in result
assert 'Vigilance' in result
def test_deduplication(self):
"""Test that duplicate keywords are deduplicated."""
raw = ['Flying', 'Flying', 'Trample', 'Flying']
allowlist = {'Flying', 'Trample'}
frequency_map = {
'Flying': 100,
'Trample': 50
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert result.count('Flying') == 1
assert result.count('Trample') == 1
def test_non_string_entries_skipped(self):
"""Test that non-string entries are safely skipped."""
raw = ['Flying', None, 123, 'Trample', '']
allowlist = {'Flying', 'Trample'}
frequency_map = {
'Flying': 100,
'Trample': 50
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Flying' in result
assert 'Trample' in result
assert len(result) == 2
def test_invalid_input_raises_error(self):
"""Test that non-iterable input raises ValueError."""
with pytest.raises(ValueError, match="raw must be iterable"):
tag_utils.normalize_keywords("not-a-list", set(), {})
def test_allowlist_preserves_singletons(self):
"""Test that allowlisted keywords survive even if they're singletons."""
raw = ['Myriad', 'Flying', 'Cascade']
allowlist = {'Flying', 'Myriad', 'Cascade'} # All allowlisted
frequency_map = {
'Myriad': 1, # Singleton
'Flying': 100,
'Cascade': 1 # Singleton
}
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
assert 'Myriad' in result # Preserved despite being singleton
assert 'Flying' in result
assert 'Cascade' in result # Preserved despite being singleton
class TestKeywordIntegration:
"""Integration tests for keyword normalization in tagging flow."""
def test_normalization_preserves_evergreen_keywords(self):
"""Test that common evergreen keywords are always preserved."""
evergreen = ['Flying', 'Trample', 'Vigilance', 'Haste', 'Deathtouch', 'Lifelink']
allowlist = tag_constants.KEYWORD_ALLOWLIST
frequency_map = {kw: 100 for kw in evergreen} # All common
result = tag_utils.normalize_keywords(evergreen, allowlist, frequency_map)
for kw in evergreen:
assert kw in result
def test_crossover_keywords_pruned(self):
"""Test that crossover-specific singletons are pruned."""
crossover_singletons = [
'Gae Bolg', # Final Fantasy
'Psychic Defense', # Warhammer 40K
'Allons-y!', # Doctor Who
'Flying' # Evergreen (control)
]
allowlist = {'Flying'} # Only Flying allowed
frequency_map = {
'Gae Bolg': 1,
'Psychic Defense': 1,
'Allons-y!': 1,
'Flying': 100
}
result = tag_utils.normalize_keywords(crossover_singletons, allowlist, frequency_map)
assert result == ['Flying'] # Only evergreen survived