feat: complete protection scope filtering with pool limiting

2026-02-02 06:31:48 +01:00 · 2025-10-09 17:29:57 -07:00 · 2025-10-09 17:29:57 -07:00 · f2863ef362
commit f2863ef362
parent 06d8796316
24 changed files with 1924 additions and 558 deletions
--- a/code/tests/test_metadata_partition.py
+++ b/code/tests/test_metadata_partition.py
@ -0,0 +1,300 @@
+"""Tests for M3 metadata/theme tag partition functionality.
+
+Tests cover:
+- Tag classification (metadata vs theme)
+- Column creation and data migration  
+- Feature flag behavior
+- Compatibility with missing columns
+- CSV read/write with new schema
+"""
+import pandas as pd
+import pytest
+from code.tagging import tag_utils
+from code.tagging.tagger import _apply_metadata_partition
+
+
+class TestTagClassification:
+    """Tests for classify_tag function."""
+    
+    def test_prefix_based_metadata(self):
+        """Metadata tags identified by prefix."""
+        assert tag_utils.classify_tag("Applied: Cost Reduction") == "metadata"
+        assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
+        assert tag_utils.classify_tag("Diagnostic: Test") == "metadata"
+        assert tag_utils.classify_tag("Internal: Debug") == "metadata"
+    
+    def test_exact_match_metadata(self):
+        """Metadata tags identified by exact match."""
+        assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
+        assert tag_utils.classify_tag("Bracket: Staple") == "metadata"
+    
+    def test_kindred_protection_metadata(self):
+        """Kindred protection tags are metadata."""
+        assert tag_utils.classify_tag("Knights Gain Protection") == "metadata"
+        assert tag_utils.classify_tag("Frogs Gain Protection") == "metadata"
+        assert tag_utils.classify_tag("Zombies Gain Protection") == "metadata"
+    
+    def test_theme_classification(self):
+        """Regular gameplay tags are themes."""
+        assert tag_utils.classify_tag("Card Draw") == "theme"
+        assert tag_utils.classify_tag("Spellslinger") == "theme"
+        assert tag_utils.classify_tag("Tokens Matter") == "theme"
+        assert tag_utils.classify_tag("Ramp") == "theme"
+        assert tag_utils.classify_tag("Protection") == "theme"
+    
+    def test_edge_cases(self):
+        """Edge cases in tag classification."""
+        # Empty string
+        assert tag_utils.classify_tag("") == "theme"
+        
+        # Similar but not exact matches
+        assert tag_utils.classify_tag("Apply: Something") == "theme"  # Wrong prefix
+        assert tag_utils.classify_tag("Knights Have Protection") == "theme"  # Not "Gain"
+        
+        # Case sensitivity
+        assert tag_utils.classify_tag("applied: Cost Reduction") == "theme"  # Lowercase
+
+
+class TestMetadataPartition:
+    """Tests for _apply_metadata_partition function."""
+    
+    def test_basic_partition(self, monkeypatch):
+        """Basic partition splits tags correctly."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A', 'Card B'],
+            'themeTags': [
+                ['Card Draw', 'Applied: Cost Reduction'],
+                ['Spellslinger', 'Bracket: Game Changer', 'Tokens Matter']
+            ]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        # Check theme tags
+        assert df_out.loc[0, 'themeTags'] == ['Card Draw']
+        assert df_out.loc[1, 'themeTags'] == ['Spellslinger', 'Tokens Matter']
+        
+        # Check metadata tags
+        assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
+        assert df_out.loc[1, 'metadataTags'] == ['Bracket: Game Changer']
+        
+        # Check diagnostics
+        assert diag['enabled'] is True
+        assert diag['rows_with_tags'] == 2
+        assert diag['metadata_tags_moved'] == 2
+        assert diag['theme_tags_kept'] == 3
+    
+    def test_empty_tags(self, monkeypatch):
+        """Handles empty tag lists."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A', 'Card B'],
+            'themeTags': [[], ['Card Draw']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        assert df_out.loc[0, 'themeTags'] == []
+        assert df_out.loc[0, 'metadataTags'] == []
+        assert df_out.loc[1, 'themeTags'] == ['Card Draw']
+        assert df_out.loc[1, 'metadataTags'] == []
+        
+        assert diag['rows_with_tags'] == 1
+    
+    def test_all_metadata_tags(self, monkeypatch):
+        """Handles rows with only metadata tags."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Applied: Cost Reduction', 'Bracket: Game Changer']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        assert df_out.loc[0, 'themeTags'] == []
+        assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction', 'Bracket: Game Changer']
+        
+        assert diag['metadata_tags_moved'] == 2
+        assert diag['theme_tags_kept'] == 0
+    
+    def test_all_theme_tags(self, monkeypatch):
+        """Handles rows with only theme tags."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Card Draw', 'Ramp', 'Spellslinger']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Ramp', 'Spellslinger']
+        assert df_out.loc[0, 'metadataTags'] == []
+        
+        assert diag['metadata_tags_moved'] == 0
+        assert diag['theme_tags_kept'] == 3
+    
+    def test_feature_flag_disabled(self, monkeypatch):
+        """Feature flag disables partition."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '0')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        # Should not create metadataTags column
+        assert 'metadataTags' not in df_out.columns
+        
+        # Should not modify themeTags
+        assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
+        
+        # Should indicate disabled
+        assert diag['enabled'] is False
+    
+    def test_missing_theme_tags_column(self, monkeypatch):
+        """Handles missing themeTags column gracefully."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'other_column': ['value']
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        # Should return unchanged
+        assert 'themeTags' not in df_out.columns
+        assert 'metadataTags' not in df_out.columns
+        
+        # Should indicate error
+        assert diag['enabled'] is True
+        assert 'error' in diag
+    
+    def test_non_list_tags(self, monkeypatch):
+        """Handles non-list values in themeTags."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A', 'Card B', 'Card C'],
+            'themeTags': [['Card Draw'], None, 'not a list']
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        # Only first row should be processed
+        assert df_out.loc[0, 'themeTags'] == ['Card Draw']
+        assert df_out.loc[0, 'metadataTags'] == []
+        
+        assert diag['rows_with_tags'] == 1
+    
+    def test_kindred_protection_partition(self, monkeypatch):
+        """Kindred protection tags are moved to metadata."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Protection', 'Knights Gain Protection', 'Card Draw']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        assert 'Protection' in df_out.loc[0, 'themeTags']
+        assert 'Card Draw' in df_out.loc[0, 'themeTags']
+        assert 'Knights Gain Protection' in df_out.loc[0, 'metadataTags']
+    
+    def test_diagnostics_structure(self, monkeypatch):
+        """Diagnostics contain expected fields."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
+        })
+        
+        df_out, diag = _apply_metadata_partition(df)
+        
+        # Check required diagnostic fields
+        assert 'enabled' in diag
+        assert 'total_rows' in diag
+        assert 'rows_with_tags' in diag
+        assert 'metadata_tags_moved' in diag
+        assert 'theme_tags_kept' in diag
+        assert 'unique_metadata_tags' in diag
+        assert 'unique_theme_tags' in diag
+        assert 'most_common_metadata' in diag
+        assert 'most_common_themes' in diag
+        
+        # Check types
+        assert isinstance(diag['most_common_metadata'], list)
+        assert isinstance(diag['most_common_themes'], list)
+
+
+class TestCSVCompatibility:
+    """Tests for CSV read/write with new schema."""
+    
+    def test_csv_roundtrip_with_metadata(self, tmp_path, monkeypatch):
+        """CSV roundtrip preserves both columns."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        csv_path = tmp_path / "test_cards.csv"
+        
+        # Create initial dataframe
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Card Draw', 'Ramp']],
+            'metadataTags': [['Applied: Cost Reduction']]
+        })
+        
+        # Write to CSV
+        df.to_csv(csv_path, index=False)
+        
+        # Read back
+        df_read = pd.read_csv(
+            csv_path,
+            converters={'themeTags': pd.eval, 'metadataTags': pd.eval}
+        )
+        
+        # Verify data preserved
+        assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Ramp']
+        assert df_read.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
+    
+    def test_csv_backward_compatible(self, tmp_path, monkeypatch):
+        """Can read old CSVs without metadataTags."""
+        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
+        
+        csv_path = tmp_path / "old_cards.csv"
+        
+        # Create old-style CSV without metadataTags
+        df = pd.DataFrame({
+            'name': ['Card A'],
+            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
+        })
+        df.to_csv(csv_path, index=False)
+        
+        # Read back
+        df_read = pd.read_csv(csv_path, converters={'themeTags': pd.eval})
+        
+        # Should read successfully
+        assert 'themeTags' in df_read.columns
+        assert 'metadataTags' not in df_read.columns
+        assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
+        
+        # Apply partition
+        df_partitioned, _ = _apply_metadata_partition(df_read)
+        
+        # Should now have both columns
+        assert 'themeTags' in df_partitioned.columns
+        assert 'metadataTags' in df_partitioned.columns
+        assert df_partitioned.loc[0, 'themeTags'] == ['Card Draw']
+        assert df_partitioned.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])