feat: complete protection scope filtering with pool limiting

This commit is contained in:
matt 2025-10-09 17:29:57 -07:00
parent 06d8796316
commit f2863ef362
24 changed files with 1924 additions and 558 deletions

View file

@ -0,0 +1,300 @@
"""Tests for M3 metadata/theme tag partition functionality.
Tests cover:
- Tag classification (metadata vs theme)
- Column creation and data migration
- Feature flag behavior
- Compatibility with missing columns
- CSV read/write with new schema
"""
import pandas as pd
import pytest
from code.tagging import tag_utils
from code.tagging.tagger import _apply_metadata_partition
class TestTagClassification:
"""Tests for classify_tag function."""
def test_prefix_based_metadata(self):
"""Metadata tags identified by prefix."""
assert tag_utils.classify_tag("Applied: Cost Reduction") == "metadata"
assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
assert tag_utils.classify_tag("Diagnostic: Test") == "metadata"
assert tag_utils.classify_tag("Internal: Debug") == "metadata"
def test_exact_match_metadata(self):
"""Metadata tags identified by exact match."""
assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
assert tag_utils.classify_tag("Bracket: Staple") == "metadata"
def test_kindred_protection_metadata(self):
"""Kindred protection tags are metadata."""
assert tag_utils.classify_tag("Knights Gain Protection") == "metadata"
assert tag_utils.classify_tag("Frogs Gain Protection") == "metadata"
assert tag_utils.classify_tag("Zombies Gain Protection") == "metadata"
def test_theme_classification(self):
"""Regular gameplay tags are themes."""
assert tag_utils.classify_tag("Card Draw") == "theme"
assert tag_utils.classify_tag("Spellslinger") == "theme"
assert tag_utils.classify_tag("Tokens Matter") == "theme"
assert tag_utils.classify_tag("Ramp") == "theme"
assert tag_utils.classify_tag("Protection") == "theme"
def test_edge_cases(self):
"""Edge cases in tag classification."""
# Empty string
assert tag_utils.classify_tag("") == "theme"
# Similar but not exact matches
assert tag_utils.classify_tag("Apply: Something") == "theme" # Wrong prefix
assert tag_utils.classify_tag("Knights Have Protection") == "theme" # Not "Gain"
# Case sensitivity
assert tag_utils.classify_tag("applied: Cost Reduction") == "theme" # Lowercase
class TestMetadataPartition:
"""Tests for _apply_metadata_partition function."""
def test_basic_partition(self, monkeypatch):
"""Basic partition splits tags correctly."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A', 'Card B'],
'themeTags': [
['Card Draw', 'Applied: Cost Reduction'],
['Spellslinger', 'Bracket: Game Changer', 'Tokens Matter']
]
})
df_out, diag = _apply_metadata_partition(df)
# Check theme tags
assert df_out.loc[0, 'themeTags'] == ['Card Draw']
assert df_out.loc[1, 'themeTags'] == ['Spellslinger', 'Tokens Matter']
# Check metadata tags
assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
assert df_out.loc[1, 'metadataTags'] == ['Bracket: Game Changer']
# Check diagnostics
assert diag['enabled'] is True
assert diag['rows_with_tags'] == 2
assert diag['metadata_tags_moved'] == 2
assert diag['theme_tags_kept'] == 3
def test_empty_tags(self, monkeypatch):
"""Handles empty tag lists."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A', 'Card B'],
'themeTags': [[], ['Card Draw']]
})
df_out, diag = _apply_metadata_partition(df)
assert df_out.loc[0, 'themeTags'] == []
assert df_out.loc[0, 'metadataTags'] == []
assert df_out.loc[1, 'themeTags'] == ['Card Draw']
assert df_out.loc[1, 'metadataTags'] == []
assert diag['rows_with_tags'] == 1
def test_all_metadata_tags(self, monkeypatch):
"""Handles rows with only metadata tags."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Applied: Cost Reduction', 'Bracket: Game Changer']]
})
df_out, diag = _apply_metadata_partition(df)
assert df_out.loc[0, 'themeTags'] == []
assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction', 'Bracket: Game Changer']
assert diag['metadata_tags_moved'] == 2
assert diag['theme_tags_kept'] == 0
def test_all_theme_tags(self, monkeypatch):
"""Handles rows with only theme tags."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Card Draw', 'Ramp', 'Spellslinger']]
})
df_out, diag = _apply_metadata_partition(df)
assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Ramp', 'Spellslinger']
assert df_out.loc[0, 'metadataTags'] == []
assert diag['metadata_tags_moved'] == 0
assert diag['theme_tags_kept'] == 3
def test_feature_flag_disabled(self, monkeypatch):
"""Feature flag disables partition."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '0')
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
})
df_out, diag = _apply_metadata_partition(df)
# Should not create metadataTags column
assert 'metadataTags' not in df_out.columns
# Should not modify themeTags
assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
# Should indicate disabled
assert diag['enabled'] is False
def test_missing_theme_tags_column(self, monkeypatch):
"""Handles missing themeTags column gracefully."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A'],
'other_column': ['value']
})
df_out, diag = _apply_metadata_partition(df)
# Should return unchanged
assert 'themeTags' not in df_out.columns
assert 'metadataTags' not in df_out.columns
# Should indicate error
assert diag['enabled'] is True
assert 'error' in diag
def test_non_list_tags(self, monkeypatch):
"""Handles non-list values in themeTags."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A', 'Card B', 'Card C'],
'themeTags': [['Card Draw'], None, 'not a list']
})
df_out, diag = _apply_metadata_partition(df)
# Only first row should be processed
assert df_out.loc[0, 'themeTags'] == ['Card Draw']
assert df_out.loc[0, 'metadataTags'] == []
assert diag['rows_with_tags'] == 1
def test_kindred_protection_partition(self, monkeypatch):
"""Kindred protection tags are moved to metadata."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Protection', 'Knights Gain Protection', 'Card Draw']]
})
df_out, diag = _apply_metadata_partition(df)
assert 'Protection' in df_out.loc[0, 'themeTags']
assert 'Card Draw' in df_out.loc[0, 'themeTags']
assert 'Knights Gain Protection' in df_out.loc[0, 'metadataTags']
def test_diagnostics_structure(self, monkeypatch):
"""Diagnostics contain expected fields."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
})
df_out, diag = _apply_metadata_partition(df)
# Check required diagnostic fields
assert 'enabled' in diag
assert 'total_rows' in diag
assert 'rows_with_tags' in diag
assert 'metadata_tags_moved' in diag
assert 'theme_tags_kept' in diag
assert 'unique_metadata_tags' in diag
assert 'unique_theme_tags' in diag
assert 'most_common_metadata' in diag
assert 'most_common_themes' in diag
# Check types
assert isinstance(diag['most_common_metadata'], list)
assert isinstance(diag['most_common_themes'], list)
class TestCSVCompatibility:
"""Tests for CSV read/write with new schema."""
def test_csv_roundtrip_with_metadata(self, tmp_path, monkeypatch):
"""CSV roundtrip preserves both columns."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
csv_path = tmp_path / "test_cards.csv"
# Create initial dataframe
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Card Draw', 'Ramp']],
'metadataTags': [['Applied: Cost Reduction']]
})
# Write to CSV
df.to_csv(csv_path, index=False)
# Read back
df_read = pd.read_csv(
csv_path,
converters={'themeTags': pd.eval, 'metadataTags': pd.eval}
)
# Verify data preserved
assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Ramp']
assert df_read.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
def test_csv_backward_compatible(self, tmp_path, monkeypatch):
"""Can read old CSVs without metadataTags."""
monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
csv_path = tmp_path / "old_cards.csv"
# Create old-style CSV without metadataTags
df = pd.DataFrame({
'name': ['Card A'],
'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
})
df.to_csv(csv_path, index=False)
# Read back
df_read = pd.read_csv(csv_path, converters={'themeTags': pd.eval})
# Should read successfully
assert 'themeTags' in df_read.columns
assert 'metadataTags' not in df_read.columns
assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
# Apply partition
df_partitioned, _ = _apply_metadata_partition(df_read)
# Should now have both columns
assert 'themeTags' in df_partitioned.columns
assert 'metadataTags' in df_partitioned.columns
assert df_partitioned.loc[0, 'themeTags'] == ['Card Draw']
assert df_partitioned.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
if __name__ == "__main__":
pytest.main([__file__, "-v"])