feat: theme catalog optimization with tag search and faster enrichment

2025-12-17 16:10:12 +01:00 · 2025-10-15 17:17:46 -07:00 · 2025-10-15 17:17:46 -07:00 · 9e6c68f559
commit 9e6c68f559
parent 952b151162
26 changed files with 5906 additions and 5688 deletions
--- a/code/tests/test_tag_index.py
+++ b/code/tests/test_tag_index.py
@ -0,0 +1,429 @@
+"""Tests for tag index functionality."""
+import json
+import time
+
+from code.tagging.tag_index import (
+    TagIndex,
+    IndexStats,
+    get_tag_index,
+    clear_global_index,
+)
+
+
+class TestTagIndexBuild:
+    """Test index building operations."""
+    
+    def test_build_index(self):
+        """Test that index builds successfully."""
+        index = TagIndex()
+        stats = index.build()
+        
+        assert isinstance(stats, IndexStats)
+        assert stats.total_cards > 0
+        assert stats.total_tags > 0
+        assert stats.total_mappings > 0
+        assert stats.build_time_seconds >= 0
+    
+    def test_build_index_performance(self):
+        """Test that index builds in reasonable time."""
+        index = TagIndex()
+        
+        start = time.perf_counter()
+        stats = index.build()
+        elapsed = time.perf_counter() - start
+        
+        # Should build in <5s for typical dataset
+        assert elapsed < 5.0
+        assert stats.build_time_seconds < 5.0
+    
+    def test_force_rebuild(self):
+        """Test that force_rebuild always rebuilds."""
+        index = TagIndex()
+        
+        # Build once
+        stats1 = index.build()
+        time1 = stats1.indexed_at
+        
+        # Wait a bit
+        time.sleep(0.1)
+        
+        # Force rebuild
+        stats2 = index.build(force_rebuild=True)
+        time2 = stats2.indexed_at
+        
+        # Should have different timestamps
+        assert time2 > time1
+
+
+class TestSingleTagQueries:
+    """Test single tag lookup operations."""
+    
+    def test_get_cards_with_tag(self):
+        """Test getting cards with a specific tag."""
+        index = TagIndex()
+        index.build()
+        
+        # Get a tag that exists
+        all_tags = index.get_all_tags()
+        if all_tags:
+            tag = all_tags[0]
+            cards = index.get_cards_with_tag(tag)
+            
+            assert isinstance(cards, set)
+            assert len(cards) > 0
+    
+    def test_get_cards_with_nonexistent_tag(self):
+        """Test querying for tag that doesn't exist."""
+        index = TagIndex()
+        index.build()
+        
+        cards = index.get_cards_with_tag("ThisTagDoesNotExist12345")
+        
+        assert cards == set()
+    
+    def test_get_tags_for_card(self):
+        """Test getting tags for a specific card."""
+        index = TagIndex()
+        index.build()
+        
+        # Get a card that exists
+        cards = index.get_cards_with_tag(index.get_all_tags()[0]) if index.get_all_tags() else set()
+        if cards:
+            card_name = list(cards)[0]
+            tags = index.get_tags_for_card(card_name)
+            
+            assert isinstance(tags, list)
+            assert len(tags) > 0
+    
+    def test_get_tags_for_nonexistent_card(self):
+        """Test getting tags for card that doesn't exist."""
+        index = TagIndex()
+        index.build()
+        
+        tags = index.get_tags_for_card("This Card Does Not Exist 12345")
+        
+        assert tags == []
+
+
+class TestMultiTagQueries:
+    """Test queries with multiple tags."""
+    
+    def test_get_cards_with_all_tags(self):
+        """Test AND logic (cards must have all tags)."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if len(all_tags) >= 2:
+            # Pick two tags
+            tag1, tag2 = all_tags[0], all_tags[1]
+            
+            cards1 = index.get_cards_with_tag(tag1)
+            cards2 = index.get_cards_with_tag(tag2)
+            cards_both = index.get_cards_with_all_tags([tag1, tag2])
+            
+            # Result should be subset of both
+            assert cards_both.issubset(cards1)
+            assert cards_both.issubset(cards2)
+            
+            # Result should be intersection
+            assert cards_both == (cards1 & cards2)
+    
+    def test_get_cards_with_any_tags(self):
+        """Test OR logic (cards need at least one tag)."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if len(all_tags) >= 2:
+            # Pick two tags
+            tag1, tag2 = all_tags[0], all_tags[1]
+            
+            cards1 = index.get_cards_with_tag(tag1)
+            cards2 = index.get_cards_with_tag(tag2)
+            cards_any = index.get_cards_with_any_tags([tag1, tag2])
+            
+            # Result should be superset of both
+            assert cards1.issubset(cards_any)
+            assert cards2.issubset(cards_any)
+            
+            # Result should be union
+            assert cards_any == (cards1 | cards2)
+    
+    def test_get_cards_with_empty_tag_list(self):
+        """Test querying with empty tag list."""
+        index = TagIndex()
+        index.build()
+        
+        cards_all = index.get_cards_with_all_tags([])
+        cards_any = index.get_cards_with_any_tags([])
+        
+        assert cards_all == set()
+        assert cards_any == set()
+    
+    def test_get_cards_with_nonexistent_tags(self):
+        """Test querying with tags that don't exist."""
+        index = TagIndex()
+        index.build()
+        
+        fake_tags = ["FakeTag1", "FakeTag2"]
+        
+        cards_all = index.get_cards_with_all_tags(fake_tags)
+        cards_any = index.get_cards_with_any_tags(fake_tags)
+        
+        assert cards_all == set()
+        assert cards_any == set()
+
+
+class TestIndexStats:
+    """Test index statistics and metadata."""
+    
+    def test_get_stats(self):
+        """Test getting index statistics."""
+        index = TagIndex()
+        
+        # Before building
+        assert index.get_stats() is None
+        
+        # After building
+        stats = index.build()
+        retrieved_stats = index.get_stats()
+        
+        assert retrieved_stats is not None
+        assert retrieved_stats.total_cards == stats.total_cards
+        assert retrieved_stats.total_tags == stats.total_tags
+    
+    def test_get_all_tags(self):
+        """Test getting list of all tags."""
+        index = TagIndex()
+        index.build()
+        
+        tags = index.get_all_tags()
+        
+        assert isinstance(tags, list)
+        assert len(tags) > 0
+        # Should be sorted
+        assert tags == sorted(tags)
+    
+    def test_get_tag_stats(self):
+        """Test getting stats for specific tag."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if all_tags:
+            tag = all_tags[0]
+            stats = index.get_tag_stats(tag)
+            
+            assert "card_count" in stats
+            assert stats["card_count"] > 0
+    
+    def test_get_popular_tags(self):
+        """Test getting most popular tags."""
+        index = TagIndex()
+        index.build()
+        
+        popular = index.get_popular_tags(limit=10)
+        
+        assert isinstance(popular, list)
+        assert len(popular) <= 10
+        
+        if len(popular) > 1:
+            # Should be sorted by count descending
+            counts = [count for _, count in popular]
+            assert counts == sorted(counts, reverse=True)
+
+
+class TestCaching:
+    """Test index caching and persistence."""
+    
+    def test_save_and_load_cache(self, tmp_path):
+        """Test that cache saves and loads correctly."""
+        cache_path = tmp_path / ".tag_index_test.json"
+        
+        # Build and save
+        index1 = TagIndex(cache_path=cache_path)
+        stats1 = index1.build()
+        
+        assert cache_path.exists()
+        
+        # Load from cache
+        index2 = TagIndex(cache_path=cache_path)
+        stats2 = index2.build()  # Should load from cache
+        
+        # Should have same data
+        assert stats2.total_cards == stats1.total_cards
+        assert stats2.total_tags == stats1.total_tags
+        assert stats2.indexed_at == stats1.indexed_at
+    
+    def test_cache_invalidation(self, tmp_path):
+        """Test that cache is rebuilt when all_cards changes."""
+        cache_path = tmp_path / ".tag_index_test.json"
+        
+        # Build index
+        index = TagIndex(cache_path=cache_path)
+        stats1 = index.build()
+        
+        # Modify cache to simulate outdated mtime
+        with cache_path.open("r") as f:
+            cache_data = json.load(f)
+        
+        cache_data["stats"]["all_cards_mtime"] = 0  # Very old
+        
+        with cache_path.open("w") as f:
+            json.dump(cache_data, f)
+        
+        # Should rebuild (not use cache)
+        index2 = TagIndex(cache_path=cache_path)
+        stats2 = index2.build()
+        
+        # Should have new timestamp
+        assert stats2.indexed_at > stats1.indexed_at
+    
+    def test_clear_cache(self, tmp_path):
+        """Test cache clearing."""
+        cache_path = tmp_path / ".tag_index_test.json"
+        
+        index = TagIndex(cache_path=cache_path)
+        index.build()
+        
+        assert cache_path.exists()
+        
+        index.clear_cache()
+        
+        assert not cache_path.exists()
+
+
+class TestGlobalIndex:
+    """Test global index accessor."""
+    
+    def test_get_tag_index(self):
+        """Test getting global index."""
+        clear_global_index()
+        
+        index = get_tag_index()
+        
+        assert isinstance(index, TagIndex)
+        assert index.get_stats() is not None
+    
+    def test_get_tag_index_singleton(self):
+        """Test that global index is a singleton."""
+        clear_global_index()
+        
+        index1 = get_tag_index()
+        index2 = get_tag_index()
+        
+        # Should be same instance
+        assert index1 is index2
+    
+    def test_clear_global_index(self):
+        """Test clearing global index."""
+        index1 = get_tag_index()
+        
+        clear_global_index()
+        
+        index2 = get_tag_index()
+        
+        # Should be different instance
+        assert index1 is not index2
+
+
+class TestEdgeCases:
+    """Test edge cases and error handling."""
+    
+    def test_cards_with_no_tags(self):
+        """Test that cards without tags are handled."""
+        index = TagIndex()
+        index.build()
+        
+        # Get stats - should handle cards with no tags gracefully
+        stats = index.get_stats()
+        assert stats is not None
+    
+    def test_special_characters_in_tags(self):
+        """Test tags with special characters."""
+        index = TagIndex()
+        index.build()
+        
+        # Try querying with special chars (should not crash)
+        cards = index.get_cards_with_tag("Life & Death")
+        assert isinstance(cards, set)
+    
+    def test_case_sensitive_tags(self):
+        """Test that tag lookups are case-sensitive."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if all_tags:
+            tag = all_tags[0]
+            
+            cards1 = index.get_cards_with_tag(tag)
+            cards2 = index.get_cards_with_tag(tag.upper())
+            cards3 = index.get_cards_with_tag(tag.lower())
+            
+            # Case matters - may get different results
+            # (depends on tag naming in data)
+            assert isinstance(cards1, set)
+            assert isinstance(cards2, set)
+            assert isinstance(cards3, set)
+    
+    def test_duplicate_tags_handled(self):
+        """Test that duplicate tags in query are handled."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if all_tags:
+            tag = all_tags[0]
+            
+            # Query with duplicate tag
+            cards = index.get_cards_with_all_tags([tag, tag])
+            cards_single = index.get_cards_with_tag(tag)
+            
+            # Should give same result as single tag
+            assert cards == cards_single
+
+
+class TestPerformance:
+    """Test performance characteristics."""
+    
+    def test_query_performance(self):
+        """Test that queries complete quickly."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if all_tags:
+            tag = all_tags[0]
+            
+            # Measure query time
+            start = time.perf_counter()
+            for _ in range(100):
+                index.get_cards_with_tag(tag)
+            elapsed = time.perf_counter() - start
+            
+            avg_time_ms = (elapsed / 100) * 1000
+            
+            # Should average <1ms per query
+            assert avg_time_ms < 1.0
+    
+    def test_multi_tag_query_performance(self):
+        """Test multi-tag query performance."""
+        index = TagIndex()
+        index.build()
+        
+        all_tags = index.get_all_tags()
+        if len(all_tags) >= 3:
+            tags = all_tags[:3]
+            
+            # Measure query time
+            start = time.perf_counter()
+            for _ in range(100):
+                index.get_cards_with_all_tags(tags)
+            elapsed = time.perf_counter() - start
+            
+            avg_time_ms = (elapsed / 100) * 1000
+            
+            # Should still be very fast
+            assert avg_time_ms < 5.0
--- a/code/tests/test_tag_loader.py
+++ b/code/tests/test_tag_loader.py
@ -0,0 +1,259 @@
+"""Tests for batch tag loading from all_cards."""
+from code.tagging.tag_loader import (
+    load_tags_for_cards,
+    load_tags_for_card,
+    get_cards_with_tag,
+    get_cards_with_all_tags,
+    clear_cache,
+    is_use_all_cards_enabled,
+)
+
+
+class TestBatchTagLoading:
+    """Test batch tag loading operations."""
+    
+    def test_load_tags_for_multiple_cards(self):
+        """Test loading tags for multiple cards at once."""
+        cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
+        result = load_tags_for_cards(cards)
+        
+        assert isinstance(result, dict)
+        assert len(result) == 3
+        
+        # All requested cards should be in result (even if no tags)
+        for card in cards:
+            assert card in result
+            assert isinstance(result[card], list)
+    
+    def test_load_tags_for_empty_list(self):
+        """Test loading tags for empty list returns empty dict."""
+        result = load_tags_for_cards([])
+        assert result == {}
+    
+    def test_load_tags_for_single_card(self):
+        """Test single card convenience function."""
+        tags = load_tags_for_card("Sol Ring")
+        
+        assert isinstance(tags, list)
+        # Sol Ring should have some tags (artifacts, ramp, etc)
+        # But we don't assert specific tags since data may vary
+    
+    def test_load_tags_for_nonexistent_card(self):
+        """Test loading tags for card that doesn't exist."""
+        tags = load_tags_for_card("This Card Does Not Exist 12345")
+        
+        # Should return empty list, not fail
+        assert tags == []
+    
+    def test_load_tags_batch_includes_missing_cards(self):
+        """Test batch loading includes missing cards with empty lists."""
+        cards = ["Sol Ring", "Fake Card Name 999", "Lightning Bolt"]
+        result = load_tags_for_cards(cards)
+        
+        # All cards should be present
+        assert len(result) == 3
+        assert "Fake Card Name 999" in result
+        assert result["Fake Card Name 999"] == []
+    
+    def test_load_tags_handles_list_format(self):
+        """Test that tags in list format are parsed correctly."""
+        # Pick a card likely to have tags
+        result = load_tags_for_cards(["Sol Ring"])
+        
+        if "Sol Ring" in result and result["Sol Ring"]:
+            tags = result["Sol Ring"]
+            # Should be a list of strings
+            assert all(isinstance(tag, str) for tag in tags)
+            # Tags should be stripped of whitespace
+            assert all(tag == tag.strip() for tag in tags)
+    
+    def test_load_tags_handles_string_format(self):
+        """Test that tags in string format are parsed correctly."""
+        # The loader should handle both list and string representations
+        # This is tested implicitly by loading any card
+        cards = ["Sol Ring", "Lightning Bolt"]
+        result = load_tags_for_cards(cards)
+        
+        for card in cards:
+            tags = result[card]
+            # All should be lists (even if empty)
+            assert isinstance(tags, list)
+            # No empty string tags
+            assert "" not in tags
+            assert all(tag.strip() for tag in tags)
+
+
+class TestTagQueries:
+    """Test querying cards by tags."""
+    
+    def test_get_cards_with_tag(self):
+        """Test getting all cards with a specific tag."""
+        # Pick a common tag
+        cards = get_cards_with_tag("ramp", limit=10)
+        
+        assert isinstance(cards, list)
+        # Should have some cards (or none if tag doesn't exist)
+        # We don't assert specific count since data varies
+    
+    def test_get_cards_with_tag_limit(self):
+        """Test limit parameter works."""
+        cards = get_cards_with_tag("ramp", limit=5)
+        
+        assert len(cards) <= 5
+    
+    def test_get_cards_with_nonexistent_tag(self):
+        """Test querying with tag that doesn't exist."""
+        cards = get_cards_with_tag("ThisTagDoesNotExist12345")
+        
+        # Should return empty list, not fail
+        assert cards == []
+    
+    def test_get_cards_with_all_tags(self):
+        """Test getting cards that have multiple tags."""
+        # Pick two tags that might overlap
+        cards = get_cards_with_all_tags(["artifacts", "ramp"], limit=10)
+        
+        assert isinstance(cards, list)
+        assert len(cards) <= 10
+    
+    def test_get_cards_with_all_tags_no_matches(self):
+        """Test query with tags that likely have no overlap."""
+        cards = get_cards_with_all_tags([
+            "ThisTagDoesNotExist1",
+            "ThisTagDoesNotExist2"
+        ])
+        
+        # Should return empty list
+        assert cards == []
+
+
+class TestCacheManagement:
+    """Test cache management functions."""
+    
+    def test_clear_cache(self):
+        """Test that cache can be cleared without errors."""
+        # Load some data
+        load_tags_for_card("Sol Ring")
+        
+        # Clear cache
+        clear_cache()
+        
+        # Should still work after clearing
+        tags = load_tags_for_card("Sol Ring")
+        assert isinstance(tags, list)
+    
+    def test_cache_persistence(self):
+        """Test that multiple calls use cached data."""
+        # First call
+        result1 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
+        
+        # Second call (should use cache)
+        result2 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
+        
+        # Results should be identical
+        assert result1 == result2
+
+
+class TestFeatureFlag:
+    """Test feature flag functionality."""
+    
+    def test_is_use_all_cards_enabled_default(self):
+        """Test that all_cards tag loading is enabled by default."""
+        enabled = is_use_all_cards_enabled()
+        
+        # Default should be True
+        assert isinstance(enabled, bool)
+        # We don't assert True since env might override
+
+
+class TestEdgeCases:
+    """Test edge cases and error handling."""
+    
+    def test_load_tags_with_special_characters(self):
+        """Test loading tags for cards with special characters."""
+        # Cards with apostrophes, commas, etc.
+        cards = [
+            "Urza's Saga",
+            "Keeper of the Accord",
+            "Esper Sentinel"
+        ]
+        result = load_tags_for_cards(cards)
+        
+        # Should handle special characters
+        assert len(result) == 3
+        for card in cards:
+            assert card in result
+    
+    def test_load_tags_preserves_card_name_case(self):
+        """Test that card names preserve their original case."""
+        cards = ["Sol Ring", "LIGHTNING BOLT", "counterspell"]
+        result = load_tags_for_cards(cards)
+        
+        # Should have entries for provided names (case-sensitive lookup)
+        assert "Sol Ring" in result or len(result) >= 1
+        # Note: exact case matching depends on all_cards data
+    
+    def test_load_tags_deduplicates(self):
+        """Test that duplicate tags are handled."""
+        # Load tags for a card
+        tags = load_tags_for_card("Sol Ring")
+        
+        # If any tags present, check for no duplicates
+        if tags:
+            assert len(tags) == len(set(tags))
+    
+    def test_large_batch_performance(self):
+        """Test that large batch loads complete in reasonable time."""
+        import time
+        
+        # Create a batch of 100 common cards
+        cards = ["Sol Ring"] * 50 + ["Lightning Bolt"] * 50
+        
+        start = time.perf_counter()
+        result = load_tags_for_cards(cards)
+        elapsed = time.perf_counter() - start
+        
+        # Should complete quickly (< 1 second for 100 cards)
+        assert elapsed < 1.0
+        assert len(result) >= 1  # At least one card found
+
+
+class TestFormatVariations:
+    """Test handling of different tag format variations."""
+    
+    def test_empty_tags_handled(self):
+        """Test that cards with no tags return empty list."""
+        # Pick a card that might have no tags (basic lands usually don't)
+        tags = load_tags_for_card("Plains")
+        
+        # Should be empty list, not None or error
+        assert tags == [] or isinstance(tags, list)
+    
+    def test_string_list_repr_parsed(self):
+        """Test parsing of string representations like \"['tag1', 'tag2']\"."""
+        # This is tested implicitly through load_tags_for_cards
+        # The loader handles multiple formats internally
+        cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
+        result = load_tags_for_cards(cards)
+        
+        # All results should be lists
+        for card, tags in result.items():
+            assert isinstance(tags, list)
+            # No stray brackets or quotes
+            for tag in tags:
+                assert "[" not in tag
+                assert "]" not in tag
+                assert '"' not in tag
+                assert "'" not in tag or tag.count("'") > 1  # Allow apostrophes in words
+    
+    def test_comma_separated_parsed(self):
+        """Test parsing of comma-separated tag strings."""
+        # The loader should handle comma-separated strings
+        # This is tested implicitly by loading any card
+        result = load_tags_for_cards(["Sol Ring"])
+        
+        if result.get("Sol Ring"):
+            tags = result["Sol Ring"]
+            # Tags should be split properly (no commas in individual tags)
+            for tag in tags:
+                assert "," not in tag or tag.count(",") == 0
--- a/code/tests/test_theme_enrichment.py
+++ b/code/tests/test_theme_enrichment.py
@ -0,0 +1,370 @@
+"""Tests for consolidated theme enrichment pipeline.
+
+These tests verify that the new consolidated pipeline produces the same results
+as the old 7-script approach, but much faster.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+from code.tagging.theme_enrichment import (
+    ThemeEnrichmentPipeline,
+    EnrichmentStats,
+    run_enrichment_pipeline,
+)
+
+
+# Skip all tests if PyYAML not available
+pytestmark = pytest.mark.skipif(yaml is None, reason="PyYAML not installed")
+
+
+@pytest.fixture
+def temp_catalog_dir(tmp_path: Path) -> Path:
+    """Create temporary catalog directory with test themes."""
+    catalog_dir = tmp_path / 'config' / 'themes' / 'catalog'
+    catalog_dir.mkdir(parents=True)
+    return catalog_dir
+
+
+@pytest.fixture
+def temp_root(tmp_path: Path, temp_catalog_dir: Path) -> Path:
+    """Create temporary project root."""
+    # Create theme_list.json
+    theme_json = tmp_path / 'config' / 'themes' / 'theme_list.json'
+    theme_json.parent.mkdir(parents=True, exist_ok=True)
+    theme_json.write_text('{"themes": []}', encoding='utf-8')
+    return tmp_path
+
+
+def write_theme(catalog_dir: Path, filename: str, data: Dict[str, Any]) -> Path:
+    """Helper to write a theme YAML file."""
+    path = catalog_dir / filename
+    path.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True), encoding='utf-8')
+    return path
+
+
+def read_theme(path: Path) -> Dict[str, Any]:
+    """Helper to read a theme YAML file."""
+    return yaml.safe_load(path.read_text(encoding='utf-8'))
+
+
+class TestThemeEnrichmentPipeline:
+    """Tests for ThemeEnrichmentPipeline class."""
+    
+    def test_init(self, temp_root: Path):
+        """Test pipeline initialization."""
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        
+        assert pipeline.root == temp_root
+        assert pipeline.min_examples == 5
+        assert pipeline.catalog_dir == temp_root / 'config' / 'themes' / 'catalog'
+        assert len(pipeline.themes) == 0
+    
+    def test_load_themes_empty_dir(self, temp_root: Path):
+        """Test loading themes from empty directory."""
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        
+        assert len(pipeline.themes) == 0
+        assert pipeline.stats.total_themes == 0
+    
+    def test_load_themes_with_valid_files(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test loading valid theme files."""
+        write_theme(temp_catalog_dir, 'landfall.yml', {
+            'display_name': 'Landfall',
+            'synergies': ['Ramp', 'Tokens'],
+            'example_commanders': []
+        })
+        write_theme(temp_catalog_dir, 'reanimate.yml', {
+            'display_name': 'Reanimate',
+            'synergies': ['Graveyard', 'Mill'],
+            'example_commanders': ['Meren of Clan Nel Toth']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        
+        assert len(pipeline.themes) == 2
+        assert pipeline.stats.total_themes == 2
+    
+    def test_autofill_placeholders_empty_examples(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test autofill adds placeholders to themes with no examples."""
+        write_theme(temp_catalog_dir, 'tokens.yml', {
+            'display_name': 'Tokens Matter',
+            'synergies': ['Sacrifice', 'Aristocrats'],
+            'example_commanders': []
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.autofill_placeholders()
+        
+        assert pipeline.stats.autofilled == 1
+        theme = list(pipeline.themes.values())[0]
+        assert theme.modified
+        assert 'Tokens Matter Anchor' in theme.data['example_commanders']
+        assert 'Sacrifice Anchor' in theme.data['example_commanders']
+        assert 'Aristocrats Anchor' in theme.data['example_commanders']
+        assert theme.data.get('editorial_quality') == 'draft'
+    
+    def test_autofill_skips_themes_with_examples(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test autofill skips themes that already have examples."""
+        write_theme(temp_catalog_dir, 'landfall.yml', {
+            'display_name': 'Landfall',
+            'synergies': ['Ramp'],
+            'example_commanders': ['Tatyova, Benthic Druid']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.autofill_placeholders()
+        
+        assert pipeline.stats.autofilled == 0
+        theme = list(pipeline.themes.values())[0]
+        assert not theme.modified
+    
+    def test_pad_examples_to_minimum(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test padding adds placeholders to reach minimum threshold."""
+        write_theme(temp_catalog_dir, 'ramp.yml', {
+            'display_name': 'Ramp',
+            'synergies': ['Landfall', 'BigSpells', 'Hydras'],
+            'example_commanders': ['Ramp Anchor', 'Landfall Anchor']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        pipeline.load_all_themes()
+        pipeline.pad_examples()
+        
+        assert pipeline.stats.padded == 1
+        theme = list(pipeline.themes.values())[0]
+        assert theme.modified
+        assert len(theme.data['example_commanders']) == 5
+        # Should add synergies first (3rd synergy), then letter suffixes
+        assert 'Hydras Anchor' in theme.data['example_commanders']
+        # Should also have letter suffixes for remaining slots
+        assert any('Anchor B' in cmd or 'Anchor C' in cmd for cmd in theme.data['example_commanders'])
+    
+    def test_pad_skips_mixed_real_and_placeholder(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test padding skips lists with both real and placeholder examples."""
+        write_theme(temp_catalog_dir, 'tokens.yml', {
+            'display_name': 'Tokens',
+            'synergies': ['Sacrifice'],
+            'example_commanders': ['Krenko, Mob Boss', 'Tokens Anchor']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        pipeline.load_all_themes()
+        pipeline.pad_examples()
+        
+        assert pipeline.stats.padded == 0
+        theme = list(pipeline.themes.values())[0]
+        assert not theme.modified
+    
+    def test_cleanup_removes_placeholders_when_real_present(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test cleanup removes placeholders when real examples are present.
+        
+        Note: cleanup only removes entries ending with ' Anchor' (no suffix).
+        Purge step removes entries with ' Anchor' or ' Anchor X' pattern.
+        """
+        write_theme(temp_catalog_dir, 'lifegain.yml', {
+            'display_name': 'Lifegain',
+            'synergies': [],
+            'example_commanders': [
+                'Oloro, Ageless Ascetic',
+                'Lifegain Anchor',  # Will be removed
+                'Trelasarra, Moon Dancer',
+            ]
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.cleanup_placeholders()
+        
+        assert pipeline.stats.cleaned == 1
+        theme = list(pipeline.themes.values())[0]
+        assert theme.modified
+        assert len(theme.data['example_commanders']) == 2
+        assert 'Oloro, Ageless Ascetic' in theme.data['example_commanders']
+        assert 'Trelasarra, Moon Dancer' in theme.data['example_commanders']
+        assert 'Lifegain Anchor' not in theme.data['example_commanders']
+    
+    def test_purge_removes_all_anchors(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test purge removes all anchor placeholders (even if no real examples)."""
+        write_theme(temp_catalog_dir, 'counters.yml', {
+            'display_name': 'Counters',
+            'synergies': [],
+            'example_commanders': [
+                'Counters Anchor',
+                'Counters Anchor B',
+                'Counters Anchor C'
+            ]
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.purge_anchors()
+        
+        assert pipeline.stats.purged == 1
+        theme = list(pipeline.themes.values())[0]
+        assert theme.modified
+        assert theme.data['example_commanders'] == []
+    
+    def test_augment_from_catalog(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test augmentation adds missing fields from catalog."""
+        # Create catalog JSON
+        catalog_json = temp_root / 'config' / 'themes' / 'theme_list.json'
+        catalog_data = {
+            'themes': [
+                {
+                    'theme': 'Landfall',
+                    'description': 'Triggers from lands entering',
+                    'popularity_bucket': 'common',
+                    'popularity_hint': 'Very popular',
+                    'deck_archetype': 'Lands'
+                }
+            ]
+        }
+        import json
+        catalog_json.write_text(json.dumps(catalog_data), encoding='utf-8')
+        
+        write_theme(temp_catalog_dir, 'landfall.yml', {
+            'display_name': 'Landfall',
+            'synergies': ['Ramp'],
+            'example_commanders': ['Tatyova, Benthic Druid']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.augment_from_catalog()
+        
+        assert pipeline.stats.augmented == 1
+        theme = list(pipeline.themes.values())[0]
+        assert theme.modified
+        assert theme.data['description'] == 'Triggers from lands entering'
+        assert theme.data['popularity_bucket'] == 'common'
+        assert theme.data['popularity_hint'] == 'Very popular'
+        assert theme.data['deck_archetype'] == 'Lands'
+    
+    def test_validate_min_examples_warning(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test validation warns about insufficient examples."""
+        write_theme(temp_catalog_dir, 'ramp.yml', {
+            'display_name': 'Ramp',
+            'synergies': [],
+            'example_commanders': ['Ramp Commander']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        pipeline.load_all_themes()
+        pipeline.validate(enforce_min=False)
+        
+        assert pipeline.stats.lint_warnings > 0
+        assert pipeline.stats.lint_errors == 0
+    
+    def test_validate_min_examples_error(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test validation errors on insufficient examples when enforced."""
+        write_theme(temp_catalog_dir, 'ramp.yml', {
+            'display_name': 'Ramp',
+            'synergies': [],
+            'example_commanders': ['Ramp Commander']
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        pipeline.load_all_themes()
+        pipeline.validate(enforce_min=True)
+        
+        assert pipeline.stats.lint_errors > 0
+    
+    def test_write_themes_dry_run(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test dry run doesn't write files."""
+        theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
+            'display_name': 'Tokens',
+            'synergies': [],
+            'example_commanders': []
+        })
+        
+        original_content = theme_path.read_text(encoding='utf-8')
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.autofill_placeholders()
+        # Don't call write_all_themes()
+        
+        # File should be unchanged
+        assert theme_path.read_text(encoding='utf-8') == original_content
+    
+    def test_write_themes_saves_changes(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test write_all_themes saves modified files."""
+        theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
+            'display_name': 'Tokens',
+            'synergies': ['Sacrifice'],
+            'example_commanders': []
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root)
+        pipeline.load_all_themes()
+        pipeline.autofill_placeholders()
+        pipeline.write_all_themes()
+        
+        # File should be updated
+        updated_data = read_theme(theme_path)
+        assert len(updated_data['example_commanders']) > 0
+        assert 'Tokens Anchor' in updated_data['example_commanders']
+    
+    def test_run_all_full_pipeline(self, temp_root: Path, temp_catalog_dir: Path):
+        """Test running the complete enrichment pipeline."""
+        write_theme(temp_catalog_dir, 'landfall.yml', {
+            'display_name': 'Landfall',
+            'synergies': ['Ramp', 'Lands'],
+            'example_commanders': []
+        })
+        write_theme(temp_catalog_dir, 'reanimate.yml', {
+            'display_name': 'Reanimate',
+            'synergies': ['Graveyard'],
+            'example_commanders': []
+        })
+        
+        pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
+        stats = pipeline.run_all(write=True, enforce_min=False, strict_lint=False)
+        
+        assert stats.total_themes == 2
+        assert stats.autofilled >= 2
+        assert stats.padded >= 2
+        
+        # Verify files were updated
+        landfall_data = read_theme(temp_catalog_dir / 'landfall.yml')
+        assert len(landfall_data['example_commanders']) >= 5
+        assert landfall_data.get('editorial_quality') == 'draft'
+
+
+def test_run_enrichment_pipeline_convenience_function(temp_root: Path, temp_catalog_dir: Path):
+    """Test the convenience function wrapper."""
+    write_theme(temp_catalog_dir, 'tokens.yml', {
+        'display_name': 'Tokens',
+        'synergies': ['Sacrifice'],
+        'example_commanders': []
+    })
+    
+    stats = run_enrichment_pipeline(
+        root=temp_root,
+        min_examples=3,
+        write=True,
+        enforce_min=False,
+        strict=False,
+        progress_callback=None,
+    )
+    
+    assert isinstance(stats, EnrichmentStats)
+    assert stats.total_themes == 1
+    assert stats.autofilled >= 1
+    
+    # Verify file was written
+    tokens_data = read_theme(temp_catalog_dir / 'tokens.yml')
+    assert len(tokens_data['example_commanders']) >= 3
--- a/code/tests/test_web_tag_endpoints.py
+++ b/code/tests/test_web_tag_endpoints.py
@ -0,0 +1,214 @@
+"""Tests for web tag search endpoints."""
+import pytest
+from fastapi.testclient import TestClient
+
+
+@pytest.fixture
+def client():
+    """Create a test client for the web app."""
+    # Import here to avoid circular imports
+    from code.web.app import app
+    return TestClient(app)
+
+
+def test_theme_autocomplete_basic(client):
+    """Test basic theme autocomplete functionality."""
+    response = client.get("/commanders/theme-autocomplete?theme=life&limit=5")
+    
+    assert response.status_code == 200
+    assert "text/html" in response.headers["content-type"]
+    
+    content = response.text
+    assert "autocomplete-item" in content
+    assert "Life" in content  # Should match tags starting with "life"
+    assert "tag-count" in content  # Should show card counts
+
+
+def test_theme_autocomplete_min_length(client):
+    """Test that theme autocomplete requires minimum 2 characters."""
+    response = client.get("/commanders/theme-autocomplete?theme=a&limit=5")
+    
+    # Should fail validation
+    assert response.status_code == 422
+
+
+def test_theme_autocomplete_no_matches(client):
+    """Test theme autocomplete with query that has no matches."""
+    response = client.get("/commanders/theme-autocomplete?theme=zzzzzzzzz&limit=5")
+    
+    assert response.status_code == 200
+    content = response.text
+    assert "autocomplete-empty" in content or "No matching themes" in content
+
+
+def test_theme_autocomplete_limit(client):
+    """Test that theme autocomplete respects limit parameter."""
+    response = client.get("/commanders/theme-autocomplete?theme=a&limit=3")
+    
+    assert response.status_code in [200, 422]  # May fail min_length validation
+    
+    # Try with valid length
+    response = client.get("/commanders/theme-autocomplete?theme=to&limit=3")
+    assert response.status_code == 200
+    
+    # Count items (rough check - should have at most 3)
+    content = response.text
+    item_count = content.count('class="autocomplete-item"')
+    assert item_count <= 3
+
+
+def test_api_cards_by_tags_and_logic(client):
+    """Test card search with AND logic."""
+    response = client.get("/api/cards/by-tags?tags=tokens&logic=AND&limit=10")
+    
+    assert response.status_code == 200
+    data = response.json()
+    
+    assert "tags" in data
+    assert "logic" in data
+    assert data["logic"] == "AND"
+    assert "total_matches" in data
+    assert "cards" in data
+    assert isinstance(data["cards"], list)
+
+
+def test_api_cards_by_tags_or_logic(client):
+    """Test card search with OR logic."""
+    response = client.get("/api/cards/by-tags?tags=tokens,sacrifice&logic=OR&limit=10")
+    
+    assert response.status_code == 200
+    data = response.json()
+    
+    assert data["logic"] == "OR"
+    assert "cards" in data
+
+
+def test_api_cards_by_tags_invalid_logic(client):
+    """Test that invalid logic parameter returns error."""
+    response = client.get("/api/cards/by-tags?tags=tokens&logic=INVALID&limit=10")
+    
+    assert response.status_code == 400
+    data = response.json()
+    assert "error" in data
+
+
+def test_api_cards_by_tags_empty_tags(client):
+    """Test that empty tags parameter returns error."""
+    response = client.get("/api/cards/by-tags?tags=&logic=AND&limit=10")
+    
+    assert response.status_code == 400
+    data = response.json()
+    assert "error" in data
+
+
+def test_api_tags_search(client):
+    """Test tag search autocomplete endpoint."""
+    response = client.get("/api/cards/tags/search?q=life&limit=10")
+    
+    assert response.status_code == 200
+    data = response.json()
+    
+    assert "query" in data
+    assert data["query"] == "life"
+    assert "matches" in data
+    assert isinstance(data["matches"], list)
+    
+    # Check match structure
+    if data["matches"]:
+        match = data["matches"][0]
+        assert "tag" in match
+        assert "card_count" in match
+        assert match["tag"].lower().startswith("life")
+
+
+def test_api_tags_search_min_length(client):
+    """Test that tag search requires minimum 2 characters."""
+    response = client.get("/api/cards/tags/search?q=a&limit=10")
+    
+    # Should fail validation
+    assert response.status_code == 422
+
+
+def test_api_tags_popular(client):
+    """Test popular tags endpoint."""
+    response = client.get("/api/cards/tags/popular?limit=20")
+    
+    assert response.status_code == 200
+    data = response.json()
+    
+    assert "count" in data
+    assert "tags" in data
+    assert isinstance(data["tags"], list)
+    assert data["count"] == len(data["tags"])
+    assert data["count"] <= 20
+    
+    # Check tag structure
+    if data["tags"]:
+        tag = data["tags"][0]
+        assert "tag" in tag
+        assert "card_count" in tag
+        assert isinstance(tag["card_count"], int)
+        
+        # Tags should be sorted by card count (descending)
+        if len(data["tags"]) > 1:
+            assert data["tags"][0]["card_count"] >= data["tags"][1]["card_count"]
+
+
+def test_api_tags_popular_limit(client):
+    """Test that popular tags endpoint respects limit."""
+    response = client.get("/api/cards/tags/popular?limit=5")
+    
+    assert response.status_code == 200
+    data = response.json()
+    
+    assert len(data["tags"]) <= 5
+
+
+def test_commanders_page_loads(client):
+    """Test that commanders page loads successfully."""
+    response = client.get("/commanders")
+    
+    assert response.status_code == 200
+    assert "text/html" in response.headers["content-type"]
+    
+    content = response.text
+    # Should have the theme filter input
+    assert "commander-theme" in content
+    assert "theme-suggestions" in content
+
+
+def test_commanders_page_with_theme_filter(client):
+    """Test commanders page with theme query parameter."""
+    response = client.get("/commanders?theme=tokens")
+    
+    assert response.status_code == 200
+    content = response.text
+    
+    # Should have the theme value in the input
+    assert 'value="tokens"' in content or "tokens" in content
+
+
+@pytest.mark.skip(reason="Performance test - run manually")
+def test_theme_autocomplete_performance(client):
+    """Test that theme autocomplete responds quickly."""
+    import time
+    
+    start = time.time()
+    response = client.get("/commanders/theme-autocomplete?theme=to&limit=20")
+    elapsed = time.time() - start
+    
+    assert response.status_code == 200
+    assert elapsed < 0.05  # Should respond in <50ms
+
+
+@pytest.mark.skip(reason="Performance test - run manually")
+def test_api_tags_search_performance(client):
+    """Test that tag search responds quickly."""
+    import time
+    
+    start = time.time()
+    response = client.get("/api/cards/tags/search?q=to&limit=20")
+    elapsed = time.time() - start
+    
+    assert response.status_code == 200
+    assert elapsed < 0.05  # Should respond in <50ms