mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 16:10:12 +01:00
182 lines
6.3 KiB
Python
182 lines
6.3 KiB
Python
"""Tests for keyword normalization (M1 - Tagging Refinement)."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from code.tagging import tag_utils, tag_constants
|
|
|
|
|
|
class TestKeywordNormalization:
|
|
"""Test suite for normalize_keywords function."""
|
|
|
|
def test_canonical_mappings(self):
|
|
"""Test that variant keywords map to canonical forms."""
|
|
raw = ['Commander Ninjutsu', 'Flying', 'Trample']
|
|
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
|
frequency_map = {
|
|
'Commander Ninjutsu': 2,
|
|
'Flying': 100,
|
|
'Trample': 50
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Ninjutsu' in result
|
|
assert 'Flying' in result
|
|
assert 'Trample' in result
|
|
assert 'Commander Ninjutsu' not in result
|
|
|
|
def test_singleton_pruning(self):
|
|
"""Test that singleton keywords are pruned unless allowlisted."""
|
|
raw = ['Allons-y!', 'Flying', 'Take 59 Flights of Stairs']
|
|
allowlist = {'Flying'} # Only Flying is allowlisted
|
|
frequency_map = {
|
|
'Allons-y!': 1,
|
|
'Flying': 100,
|
|
'Take 59 Flights of Stairs': 1
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Flying' in result
|
|
assert 'Allons-y!' not in result
|
|
assert 'Take 59 Flights of Stairs' not in result
|
|
|
|
def test_case_normalization(self):
|
|
"""Test that keywords are normalized to proper case."""
|
|
raw = ['flying', 'TRAMPLE', 'vigilance']
|
|
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
|
frequency_map = {
|
|
'flying': 100,
|
|
'TRAMPLE': 50,
|
|
'vigilance': 75
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
# Case normalization happens via the map
|
|
# If not in map, original case is preserved
|
|
assert len(result) == 3
|
|
|
|
def test_partner_exclusion(self):
|
|
"""Test that partner keywords remain excluded."""
|
|
raw = ['Partner', 'Flying', 'Trample']
|
|
allowlist = {'Flying', 'Trample'}
|
|
frequency_map = {
|
|
'Partner': 50,
|
|
'Flying': 100,
|
|
'Trample': 50
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Flying' in result
|
|
assert 'Trample' in result
|
|
assert 'Partner' not in result # Excluded
|
|
assert 'partner' not in result
|
|
|
|
def test_empty_input(self):
|
|
"""Test that empty input returns empty list."""
|
|
result = tag_utils.normalize_keywords([], set(), {})
|
|
assert result == []
|
|
|
|
def test_whitespace_handling(self):
|
|
"""Test that whitespace is properly stripped."""
|
|
raw = [' Flying ', 'Trample ', ' Vigilance']
|
|
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
|
frequency_map = {
|
|
'Flying': 100,
|
|
'Trample': 50,
|
|
'Vigilance': 75
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Flying' in result
|
|
assert 'Trample' in result
|
|
assert 'Vigilance' in result
|
|
|
|
def test_deduplication(self):
|
|
"""Test that duplicate keywords are deduplicated."""
|
|
raw = ['Flying', 'Flying', 'Trample', 'Flying']
|
|
allowlist = {'Flying', 'Trample'}
|
|
frequency_map = {
|
|
'Flying': 100,
|
|
'Trample': 50
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert result.count('Flying') == 1
|
|
assert result.count('Trample') == 1
|
|
|
|
def test_non_string_entries_skipped(self):
|
|
"""Test that non-string entries are safely skipped."""
|
|
raw = ['Flying', None, 123, 'Trample', '']
|
|
allowlist = {'Flying', 'Trample'}
|
|
frequency_map = {
|
|
'Flying': 100,
|
|
'Trample': 50
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Flying' in result
|
|
assert 'Trample' in result
|
|
assert len(result) == 2
|
|
|
|
def test_invalid_input_raises_error(self):
|
|
"""Test that non-iterable input raises ValueError."""
|
|
with pytest.raises(ValueError, match="raw must be iterable"):
|
|
tag_utils.normalize_keywords("not-a-list", set(), {})
|
|
|
|
def test_allowlist_preserves_singletons(self):
|
|
"""Test that allowlisted keywords survive even if they're singletons."""
|
|
raw = ['Myriad', 'Flying', 'Cascade']
|
|
allowlist = {'Flying', 'Myriad', 'Cascade'} # All allowlisted
|
|
frequency_map = {
|
|
'Myriad': 1, # Singleton
|
|
'Flying': 100,
|
|
'Cascade': 1 # Singleton
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
|
|
|
assert 'Myriad' in result # Preserved despite being singleton
|
|
assert 'Flying' in result
|
|
assert 'Cascade' in result # Preserved despite being singleton
|
|
|
|
|
|
class TestKeywordIntegration:
|
|
"""Integration tests for keyword normalization in tagging flow."""
|
|
|
|
def test_normalization_preserves_evergreen_keywords(self):
|
|
"""Test that common evergreen keywords are always preserved."""
|
|
evergreen = ['Flying', 'Trample', 'Vigilance', 'Haste', 'Deathtouch', 'Lifelink']
|
|
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
|
frequency_map = {kw: 100 for kw in evergreen} # All common
|
|
|
|
result = tag_utils.normalize_keywords(evergreen, allowlist, frequency_map)
|
|
|
|
for kw in evergreen:
|
|
assert kw in result
|
|
|
|
def test_crossover_keywords_pruned(self):
|
|
"""Test that crossover-specific singletons are pruned."""
|
|
crossover_singletons = [
|
|
'Gae Bolg', # Final Fantasy
|
|
'Psychic Defense', # Warhammer 40K
|
|
'Allons-y!', # Doctor Who
|
|
'Flying' # Evergreen (control)
|
|
]
|
|
allowlist = {'Flying'} # Only Flying allowed
|
|
frequency_map = {
|
|
'Gae Bolg': 1,
|
|
'Psychic Defense': 1,
|
|
'Allons-y!': 1,
|
|
'Flying': 100
|
|
}
|
|
|
|
result = tag_utils.normalize_keywords(crossover_singletons, allowlist, frequency_map)
|
|
|
|
assert result == ['Flying'] # Only evergreen survived
|