mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 08:00:13 +01:00
300 lines
11 KiB
Python
300 lines
11 KiB
Python
"""Tests for M3 metadata/theme tag partition functionality.
|
|
|
|
Tests cover:
|
|
- Tag classification (metadata vs theme)
|
|
- Column creation and data migration
|
|
- Feature flag behavior
|
|
- Compatibility with missing columns
|
|
- CSV read/write with new schema
|
|
"""
|
|
import pandas as pd
|
|
import pytest
|
|
from code.tagging import tag_utils
|
|
from code.tagging.tagger import _apply_metadata_partition
|
|
|
|
|
|
class TestTagClassification:
|
|
"""Tests for classify_tag function."""
|
|
|
|
def test_prefix_based_metadata(self):
|
|
"""Metadata tags identified by prefix."""
|
|
assert tag_utils.classify_tag("Applied: Cost Reduction") == "metadata"
|
|
assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
|
|
assert tag_utils.classify_tag("Diagnostic: Test") == "metadata"
|
|
assert tag_utils.classify_tag("Internal: Debug") == "metadata"
|
|
|
|
def test_exact_match_metadata(self):
|
|
"""Metadata tags identified by exact match."""
|
|
assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
|
|
assert tag_utils.classify_tag("Bracket: Staple") == "metadata"
|
|
|
|
def test_kindred_protection_metadata(self):
|
|
"""Kindred protection tags are metadata."""
|
|
assert tag_utils.classify_tag("Knights Gain Protection") == "metadata"
|
|
assert tag_utils.classify_tag("Frogs Gain Protection") == "metadata"
|
|
assert tag_utils.classify_tag("Zombies Gain Protection") == "metadata"
|
|
|
|
def test_theme_classification(self):
|
|
"""Regular gameplay tags are themes."""
|
|
assert tag_utils.classify_tag("Card Draw") == "theme"
|
|
assert tag_utils.classify_tag("Spellslinger") == "theme"
|
|
assert tag_utils.classify_tag("Tokens Matter") == "theme"
|
|
assert tag_utils.classify_tag("Ramp") == "theme"
|
|
assert tag_utils.classify_tag("Protection") == "theme"
|
|
|
|
def test_edge_cases(self):
|
|
"""Edge cases in tag classification."""
|
|
# Empty string
|
|
assert tag_utils.classify_tag("") == "theme"
|
|
|
|
# Similar but not exact matches
|
|
assert tag_utils.classify_tag("Apply: Something") == "theme" # Wrong prefix
|
|
assert tag_utils.classify_tag("Knights Have Protection") == "theme" # Not "Gain"
|
|
|
|
# Case sensitivity
|
|
assert tag_utils.classify_tag("applied: Cost Reduction") == "theme" # Lowercase
|
|
|
|
|
|
class TestMetadataPartition:
|
|
"""Tests for _apply_metadata_partition function."""
|
|
|
|
def test_basic_partition(self, monkeypatch):
|
|
"""Basic partition splits tags correctly."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A', 'Card B'],
|
|
'themeTags': [
|
|
['Card Draw', 'Applied: Cost Reduction'],
|
|
['Spellslinger', 'Bracket: Game Changer', 'Tokens Matter']
|
|
]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
# Check theme tags
|
|
assert df_out.loc[0, 'themeTags'] == ['Card Draw']
|
|
assert df_out.loc[1, 'themeTags'] == ['Spellslinger', 'Tokens Matter']
|
|
|
|
# Check metadata tags
|
|
assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
|
|
assert df_out.loc[1, 'metadataTags'] == ['Bracket: Game Changer']
|
|
|
|
# Check diagnostics
|
|
assert diag['enabled'] is True
|
|
assert diag['rows_with_tags'] == 2
|
|
assert diag['metadata_tags_moved'] == 2
|
|
assert diag['theme_tags_kept'] == 3
|
|
|
|
def test_empty_tags(self, monkeypatch):
|
|
"""Handles empty tag lists."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A', 'Card B'],
|
|
'themeTags': [[], ['Card Draw']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
assert df_out.loc[0, 'themeTags'] == []
|
|
assert df_out.loc[0, 'metadataTags'] == []
|
|
assert df_out.loc[1, 'themeTags'] == ['Card Draw']
|
|
assert df_out.loc[1, 'metadataTags'] == []
|
|
|
|
assert diag['rows_with_tags'] == 1
|
|
|
|
def test_all_metadata_tags(self, monkeypatch):
|
|
"""Handles rows with only metadata tags."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Applied: Cost Reduction', 'Bracket: Game Changer']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
assert df_out.loc[0, 'themeTags'] == []
|
|
assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction', 'Bracket: Game Changer']
|
|
|
|
assert diag['metadata_tags_moved'] == 2
|
|
assert diag['theme_tags_kept'] == 0
|
|
|
|
def test_all_theme_tags(self, monkeypatch):
|
|
"""Handles rows with only theme tags."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Card Draw', 'Ramp', 'Spellslinger']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Ramp', 'Spellslinger']
|
|
assert df_out.loc[0, 'metadataTags'] == []
|
|
|
|
assert diag['metadata_tags_moved'] == 0
|
|
assert diag['theme_tags_kept'] == 3
|
|
|
|
def test_feature_flag_disabled(self, monkeypatch):
|
|
"""Feature flag disables partition."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '0')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
# Should not create metadataTags column
|
|
assert 'metadataTags' not in df_out.columns
|
|
|
|
# Should not modify themeTags
|
|
assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
|
|
|
|
# Should indicate disabled
|
|
assert diag['enabled'] is False
|
|
|
|
def test_missing_theme_tags_column(self, monkeypatch):
|
|
"""Handles missing themeTags column gracefully."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'other_column': ['value']
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
# Should return unchanged
|
|
assert 'themeTags' not in df_out.columns
|
|
assert 'metadataTags' not in df_out.columns
|
|
|
|
# Should indicate error
|
|
assert diag['enabled'] is True
|
|
assert 'error' in diag
|
|
|
|
def test_non_list_tags(self, monkeypatch):
|
|
"""Handles non-list values in themeTags."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A', 'Card B', 'Card C'],
|
|
'themeTags': [['Card Draw'], None, 'not a list']
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
# Only first row should be processed
|
|
assert df_out.loc[0, 'themeTags'] == ['Card Draw']
|
|
assert df_out.loc[0, 'metadataTags'] == []
|
|
|
|
assert diag['rows_with_tags'] == 1
|
|
|
|
def test_kindred_protection_partition(self, monkeypatch):
|
|
"""Kindred protection tags are moved to metadata."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Protection', 'Knights Gain Protection', 'Card Draw']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
assert 'Protection' in df_out.loc[0, 'themeTags']
|
|
assert 'Card Draw' in df_out.loc[0, 'themeTags']
|
|
assert 'Knights Gain Protection' in df_out.loc[0, 'metadataTags']
|
|
|
|
def test_diagnostics_structure(self, monkeypatch):
|
|
"""Diagnostics contain expected fields."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
|
|
})
|
|
|
|
df_out, diag = _apply_metadata_partition(df)
|
|
|
|
# Check required diagnostic fields
|
|
assert 'enabled' in diag
|
|
assert 'total_rows' in diag
|
|
assert 'rows_with_tags' in diag
|
|
assert 'metadata_tags_moved' in diag
|
|
assert 'theme_tags_kept' in diag
|
|
assert 'unique_metadata_tags' in diag
|
|
assert 'unique_theme_tags' in diag
|
|
assert 'most_common_metadata' in diag
|
|
assert 'most_common_themes' in diag
|
|
|
|
# Check types
|
|
assert isinstance(diag['most_common_metadata'], list)
|
|
assert isinstance(diag['most_common_themes'], list)
|
|
|
|
|
|
class TestCSVCompatibility:
|
|
"""Tests for CSV read/write with new schema."""
|
|
|
|
def test_csv_roundtrip_with_metadata(self, tmp_path, monkeypatch):
|
|
"""CSV roundtrip preserves both columns."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
csv_path = tmp_path / "test_cards.csv"
|
|
|
|
# Create initial dataframe
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Card Draw', 'Ramp']],
|
|
'metadataTags': [['Applied: Cost Reduction']]
|
|
})
|
|
|
|
# Write to CSV
|
|
df.to_csv(csv_path, index=False)
|
|
|
|
# Read back
|
|
df_read = pd.read_csv(
|
|
csv_path,
|
|
converters={'themeTags': pd.eval, 'metadataTags': pd.eval}
|
|
)
|
|
|
|
# Verify data preserved
|
|
assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Ramp']
|
|
assert df_read.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
|
|
|
|
def test_csv_backward_compatible(self, tmp_path, monkeypatch):
|
|
"""Can read old CSVs without metadataTags."""
|
|
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
|
|
|
|
csv_path = tmp_path / "old_cards.csv"
|
|
|
|
# Create old-style CSV without metadataTags
|
|
df = pd.DataFrame({
|
|
'name': ['Card A'],
|
|
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
|
|
})
|
|
df.to_csv(csv_path, index=False)
|
|
|
|
# Read back
|
|
df_read = pd.read_csv(csv_path, converters={'themeTags': pd.eval})
|
|
|
|
# Should read successfully
|
|
assert 'themeTags' in df_read.columns
|
|
assert 'metadataTags' not in df_read.columns
|
|
assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
|
|
|
|
# Apply partition
|
|
df_partitioned, _ = _apply_metadata_partition(df_read)
|
|
|
|
# Should now have both columns
|
|
assert 'themeTags' in df_partitioned.columns
|
|
assert 'metadataTags' in df_partitioned.columns
|
|
assert df_partitioned.loc[0, 'themeTags'] == ['Card Draw']
|
|
assert df_partitioned.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|