feat: theme catalog optimization with tag search and faster enrichment

This commit is contained in:
matt 2025-10-15 17:17:46 -07:00
parent 952b151162
commit 9e6c68f559
26 changed files with 5906 additions and 5688 deletions

View file

@ -0,0 +1,429 @@
"""Tests for tag index functionality."""
import json
import time
from code.tagging.tag_index import (
TagIndex,
IndexStats,
get_tag_index,
clear_global_index,
)
class TestTagIndexBuild:
"""Test index building operations."""
def test_build_index(self):
"""Test that index builds successfully."""
index = TagIndex()
stats = index.build()
assert isinstance(stats, IndexStats)
assert stats.total_cards > 0
assert stats.total_tags > 0
assert stats.total_mappings > 0
assert stats.build_time_seconds >= 0
def test_build_index_performance(self):
"""Test that index builds in reasonable time."""
index = TagIndex()
start = time.perf_counter()
stats = index.build()
elapsed = time.perf_counter() - start
# Should build in <5s for typical dataset
assert elapsed < 5.0
assert stats.build_time_seconds < 5.0
def test_force_rebuild(self):
"""Test that force_rebuild always rebuilds."""
index = TagIndex()
# Build once
stats1 = index.build()
time1 = stats1.indexed_at
# Wait a bit
time.sleep(0.1)
# Force rebuild
stats2 = index.build(force_rebuild=True)
time2 = stats2.indexed_at
# Should have different timestamps
assert time2 > time1
class TestSingleTagQueries:
"""Test single tag lookup operations."""
def test_get_cards_with_tag(self):
"""Test getting cards with a specific tag."""
index = TagIndex()
index.build()
# Get a tag that exists
all_tags = index.get_all_tags()
if all_tags:
tag = all_tags[0]
cards = index.get_cards_with_tag(tag)
assert isinstance(cards, set)
assert len(cards) > 0
def test_get_cards_with_nonexistent_tag(self):
"""Test querying for tag that doesn't exist."""
index = TagIndex()
index.build()
cards = index.get_cards_with_tag("ThisTagDoesNotExist12345")
assert cards == set()
def test_get_tags_for_card(self):
"""Test getting tags for a specific card."""
index = TagIndex()
index.build()
# Get a card that exists
cards = index.get_cards_with_tag(index.get_all_tags()[0]) if index.get_all_tags() else set()
if cards:
card_name = list(cards)[0]
tags = index.get_tags_for_card(card_name)
assert isinstance(tags, list)
assert len(tags) > 0
def test_get_tags_for_nonexistent_card(self):
"""Test getting tags for card that doesn't exist."""
index = TagIndex()
index.build()
tags = index.get_tags_for_card("This Card Does Not Exist 12345")
assert tags == []
class TestMultiTagQueries:
"""Test queries with multiple tags."""
def test_get_cards_with_all_tags(self):
"""Test AND logic (cards must have all tags)."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if len(all_tags) >= 2:
# Pick two tags
tag1, tag2 = all_tags[0], all_tags[1]
cards1 = index.get_cards_with_tag(tag1)
cards2 = index.get_cards_with_tag(tag2)
cards_both = index.get_cards_with_all_tags([tag1, tag2])
# Result should be subset of both
assert cards_both.issubset(cards1)
assert cards_both.issubset(cards2)
# Result should be intersection
assert cards_both == (cards1 & cards2)
def test_get_cards_with_any_tags(self):
"""Test OR logic (cards need at least one tag)."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if len(all_tags) >= 2:
# Pick two tags
tag1, tag2 = all_tags[0], all_tags[1]
cards1 = index.get_cards_with_tag(tag1)
cards2 = index.get_cards_with_tag(tag2)
cards_any = index.get_cards_with_any_tags([tag1, tag2])
# Result should be superset of both
assert cards1.issubset(cards_any)
assert cards2.issubset(cards_any)
# Result should be union
assert cards_any == (cards1 | cards2)
def test_get_cards_with_empty_tag_list(self):
"""Test querying with empty tag list."""
index = TagIndex()
index.build()
cards_all = index.get_cards_with_all_tags([])
cards_any = index.get_cards_with_any_tags([])
assert cards_all == set()
assert cards_any == set()
def test_get_cards_with_nonexistent_tags(self):
"""Test querying with tags that don't exist."""
index = TagIndex()
index.build()
fake_tags = ["FakeTag1", "FakeTag2"]
cards_all = index.get_cards_with_all_tags(fake_tags)
cards_any = index.get_cards_with_any_tags(fake_tags)
assert cards_all == set()
assert cards_any == set()
class TestIndexStats:
"""Test index statistics and metadata."""
def test_get_stats(self):
"""Test getting index statistics."""
index = TagIndex()
# Before building
assert index.get_stats() is None
# After building
stats = index.build()
retrieved_stats = index.get_stats()
assert retrieved_stats is not None
assert retrieved_stats.total_cards == stats.total_cards
assert retrieved_stats.total_tags == stats.total_tags
def test_get_all_tags(self):
"""Test getting list of all tags."""
index = TagIndex()
index.build()
tags = index.get_all_tags()
assert isinstance(tags, list)
assert len(tags) > 0
# Should be sorted
assert tags == sorted(tags)
def test_get_tag_stats(self):
"""Test getting stats for specific tag."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if all_tags:
tag = all_tags[0]
stats = index.get_tag_stats(tag)
assert "card_count" in stats
assert stats["card_count"] > 0
def test_get_popular_tags(self):
"""Test getting most popular tags."""
index = TagIndex()
index.build()
popular = index.get_popular_tags(limit=10)
assert isinstance(popular, list)
assert len(popular) <= 10
if len(popular) > 1:
# Should be sorted by count descending
counts = [count for _, count in popular]
assert counts == sorted(counts, reverse=True)
class TestCaching:
"""Test index caching and persistence."""
def test_save_and_load_cache(self, tmp_path):
"""Test that cache saves and loads correctly."""
cache_path = tmp_path / ".tag_index_test.json"
# Build and save
index1 = TagIndex(cache_path=cache_path)
stats1 = index1.build()
assert cache_path.exists()
# Load from cache
index2 = TagIndex(cache_path=cache_path)
stats2 = index2.build() # Should load from cache
# Should have same data
assert stats2.total_cards == stats1.total_cards
assert stats2.total_tags == stats1.total_tags
assert stats2.indexed_at == stats1.indexed_at
def test_cache_invalidation(self, tmp_path):
"""Test that cache is rebuilt when all_cards changes."""
cache_path = tmp_path / ".tag_index_test.json"
# Build index
index = TagIndex(cache_path=cache_path)
stats1 = index.build()
# Modify cache to simulate outdated mtime
with cache_path.open("r") as f:
cache_data = json.load(f)
cache_data["stats"]["all_cards_mtime"] = 0 # Very old
with cache_path.open("w") as f:
json.dump(cache_data, f)
# Should rebuild (not use cache)
index2 = TagIndex(cache_path=cache_path)
stats2 = index2.build()
# Should have new timestamp
assert stats2.indexed_at > stats1.indexed_at
def test_clear_cache(self, tmp_path):
"""Test cache clearing."""
cache_path = tmp_path / ".tag_index_test.json"
index = TagIndex(cache_path=cache_path)
index.build()
assert cache_path.exists()
index.clear_cache()
assert not cache_path.exists()
class TestGlobalIndex:
"""Test global index accessor."""
def test_get_tag_index(self):
"""Test getting global index."""
clear_global_index()
index = get_tag_index()
assert isinstance(index, TagIndex)
assert index.get_stats() is not None
def test_get_tag_index_singleton(self):
"""Test that global index is a singleton."""
clear_global_index()
index1 = get_tag_index()
index2 = get_tag_index()
# Should be same instance
assert index1 is index2
def test_clear_global_index(self):
"""Test clearing global index."""
index1 = get_tag_index()
clear_global_index()
index2 = get_tag_index()
# Should be different instance
assert index1 is not index2
class TestEdgeCases:
"""Test edge cases and error handling."""
def test_cards_with_no_tags(self):
"""Test that cards without tags are handled."""
index = TagIndex()
index.build()
# Get stats - should handle cards with no tags gracefully
stats = index.get_stats()
assert stats is not None
def test_special_characters_in_tags(self):
"""Test tags with special characters."""
index = TagIndex()
index.build()
# Try querying with special chars (should not crash)
cards = index.get_cards_with_tag("Life & Death")
assert isinstance(cards, set)
def test_case_sensitive_tags(self):
"""Test that tag lookups are case-sensitive."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if all_tags:
tag = all_tags[0]
cards1 = index.get_cards_with_tag(tag)
cards2 = index.get_cards_with_tag(tag.upper())
cards3 = index.get_cards_with_tag(tag.lower())
# Case matters - may get different results
# (depends on tag naming in data)
assert isinstance(cards1, set)
assert isinstance(cards2, set)
assert isinstance(cards3, set)
def test_duplicate_tags_handled(self):
"""Test that duplicate tags in query are handled."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if all_tags:
tag = all_tags[0]
# Query with duplicate tag
cards = index.get_cards_with_all_tags([tag, tag])
cards_single = index.get_cards_with_tag(tag)
# Should give same result as single tag
assert cards == cards_single
class TestPerformance:
"""Test performance characteristics."""
def test_query_performance(self):
"""Test that queries complete quickly."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if all_tags:
tag = all_tags[0]
# Measure query time
start = time.perf_counter()
for _ in range(100):
index.get_cards_with_tag(tag)
elapsed = time.perf_counter() - start
avg_time_ms = (elapsed / 100) * 1000
# Should average <1ms per query
assert avg_time_ms < 1.0
def test_multi_tag_query_performance(self):
"""Test multi-tag query performance."""
index = TagIndex()
index.build()
all_tags = index.get_all_tags()
if len(all_tags) >= 3:
tags = all_tags[:3]
# Measure query time
start = time.perf_counter()
for _ in range(100):
index.get_cards_with_all_tags(tags)
elapsed = time.perf_counter() - start
avg_time_ms = (elapsed / 100) * 1000
# Should still be very fast
assert avg_time_ms < 5.0

View file

@ -0,0 +1,259 @@
"""Tests for batch tag loading from all_cards."""
from code.tagging.tag_loader import (
load_tags_for_cards,
load_tags_for_card,
get_cards_with_tag,
get_cards_with_all_tags,
clear_cache,
is_use_all_cards_enabled,
)
class TestBatchTagLoading:
"""Test batch tag loading operations."""
def test_load_tags_for_multiple_cards(self):
"""Test loading tags for multiple cards at once."""
cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
result = load_tags_for_cards(cards)
assert isinstance(result, dict)
assert len(result) == 3
# All requested cards should be in result (even if no tags)
for card in cards:
assert card in result
assert isinstance(result[card], list)
def test_load_tags_for_empty_list(self):
"""Test loading tags for empty list returns empty dict."""
result = load_tags_for_cards([])
assert result == {}
def test_load_tags_for_single_card(self):
"""Test single card convenience function."""
tags = load_tags_for_card("Sol Ring")
assert isinstance(tags, list)
# Sol Ring should have some tags (artifacts, ramp, etc)
# But we don't assert specific tags since data may vary
def test_load_tags_for_nonexistent_card(self):
"""Test loading tags for card that doesn't exist."""
tags = load_tags_for_card("This Card Does Not Exist 12345")
# Should return empty list, not fail
assert tags == []
def test_load_tags_batch_includes_missing_cards(self):
"""Test batch loading includes missing cards with empty lists."""
cards = ["Sol Ring", "Fake Card Name 999", "Lightning Bolt"]
result = load_tags_for_cards(cards)
# All cards should be present
assert len(result) == 3
assert "Fake Card Name 999" in result
assert result["Fake Card Name 999"] == []
def test_load_tags_handles_list_format(self):
"""Test that tags in list format are parsed correctly."""
# Pick a card likely to have tags
result = load_tags_for_cards(["Sol Ring"])
if "Sol Ring" in result and result["Sol Ring"]:
tags = result["Sol Ring"]
# Should be a list of strings
assert all(isinstance(tag, str) for tag in tags)
# Tags should be stripped of whitespace
assert all(tag == tag.strip() for tag in tags)
def test_load_tags_handles_string_format(self):
"""Test that tags in string format are parsed correctly."""
# The loader should handle both list and string representations
# This is tested implicitly by loading any card
cards = ["Sol Ring", "Lightning Bolt"]
result = load_tags_for_cards(cards)
for card in cards:
tags = result[card]
# All should be lists (even if empty)
assert isinstance(tags, list)
# No empty string tags
assert "" not in tags
assert all(tag.strip() for tag in tags)
class TestTagQueries:
"""Test querying cards by tags."""
def test_get_cards_with_tag(self):
"""Test getting all cards with a specific tag."""
# Pick a common tag
cards = get_cards_with_tag("ramp", limit=10)
assert isinstance(cards, list)
# Should have some cards (or none if tag doesn't exist)
# We don't assert specific count since data varies
def test_get_cards_with_tag_limit(self):
"""Test limit parameter works."""
cards = get_cards_with_tag("ramp", limit=5)
assert len(cards) <= 5
def test_get_cards_with_nonexistent_tag(self):
"""Test querying with tag that doesn't exist."""
cards = get_cards_with_tag("ThisTagDoesNotExist12345")
# Should return empty list, not fail
assert cards == []
def test_get_cards_with_all_tags(self):
"""Test getting cards that have multiple tags."""
# Pick two tags that might overlap
cards = get_cards_with_all_tags(["artifacts", "ramp"], limit=10)
assert isinstance(cards, list)
assert len(cards) <= 10
def test_get_cards_with_all_tags_no_matches(self):
"""Test query with tags that likely have no overlap."""
cards = get_cards_with_all_tags([
"ThisTagDoesNotExist1",
"ThisTagDoesNotExist2"
])
# Should return empty list
assert cards == []
class TestCacheManagement:
"""Test cache management functions."""
def test_clear_cache(self):
"""Test that cache can be cleared without errors."""
# Load some data
load_tags_for_card("Sol Ring")
# Clear cache
clear_cache()
# Should still work after clearing
tags = load_tags_for_card("Sol Ring")
assert isinstance(tags, list)
def test_cache_persistence(self):
"""Test that multiple calls use cached data."""
# First call
result1 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
# Second call (should use cache)
result2 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
# Results should be identical
assert result1 == result2
class TestFeatureFlag:
"""Test feature flag functionality."""
def test_is_use_all_cards_enabled_default(self):
"""Test that all_cards tag loading is enabled by default."""
enabled = is_use_all_cards_enabled()
# Default should be True
assert isinstance(enabled, bool)
# We don't assert True since env might override
class TestEdgeCases:
"""Test edge cases and error handling."""
def test_load_tags_with_special_characters(self):
"""Test loading tags for cards with special characters."""
# Cards with apostrophes, commas, etc.
cards = [
"Urza's Saga",
"Keeper of the Accord",
"Esper Sentinel"
]
result = load_tags_for_cards(cards)
# Should handle special characters
assert len(result) == 3
for card in cards:
assert card in result
def test_load_tags_preserves_card_name_case(self):
"""Test that card names preserve their original case."""
cards = ["Sol Ring", "LIGHTNING BOLT", "counterspell"]
result = load_tags_for_cards(cards)
# Should have entries for provided names (case-sensitive lookup)
assert "Sol Ring" in result or len(result) >= 1
# Note: exact case matching depends on all_cards data
def test_load_tags_deduplicates(self):
"""Test that duplicate tags are handled."""
# Load tags for a card
tags = load_tags_for_card("Sol Ring")
# If any tags present, check for no duplicates
if tags:
assert len(tags) == len(set(tags))
def test_large_batch_performance(self):
"""Test that large batch loads complete in reasonable time."""
import time
# Create a batch of 100 common cards
cards = ["Sol Ring"] * 50 + ["Lightning Bolt"] * 50
start = time.perf_counter()
result = load_tags_for_cards(cards)
elapsed = time.perf_counter() - start
# Should complete quickly (< 1 second for 100 cards)
assert elapsed < 1.0
assert len(result) >= 1 # At least one card found
class TestFormatVariations:
"""Test handling of different tag format variations."""
def test_empty_tags_handled(self):
"""Test that cards with no tags return empty list."""
# Pick a card that might have no tags (basic lands usually don't)
tags = load_tags_for_card("Plains")
# Should be empty list, not None or error
assert tags == [] or isinstance(tags, list)
def test_string_list_repr_parsed(self):
"""Test parsing of string representations like \"['tag1', 'tag2']\"."""
# This is tested implicitly through load_tags_for_cards
# The loader handles multiple formats internally
cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
result = load_tags_for_cards(cards)
# All results should be lists
for card, tags in result.items():
assert isinstance(tags, list)
# No stray brackets or quotes
for tag in tags:
assert "[" not in tag
assert "]" not in tag
assert '"' not in tag
assert "'" not in tag or tag.count("'") > 1 # Allow apostrophes in words
def test_comma_separated_parsed(self):
"""Test parsing of comma-separated tag strings."""
# The loader should handle comma-separated strings
# This is tested implicitly by loading any card
result = load_tags_for_cards(["Sol Ring"])
if result.get("Sol Ring"):
tags = result["Sol Ring"]
# Tags should be split properly (no commas in individual tags)
for tag in tags:
assert "," not in tag or tag.count(",") == 0

View file

@ -0,0 +1,370 @@
"""Tests for consolidated theme enrichment pipeline.
These tests verify that the new consolidated pipeline produces the same results
as the old 7-script approach, but much faster.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict
import pytest
try:
import yaml
except ImportError:
yaml = None
from code.tagging.theme_enrichment import (
ThemeEnrichmentPipeline,
EnrichmentStats,
run_enrichment_pipeline,
)
# Skip all tests if PyYAML not available
pytestmark = pytest.mark.skipif(yaml is None, reason="PyYAML not installed")
@pytest.fixture
def temp_catalog_dir(tmp_path: Path) -> Path:
"""Create temporary catalog directory with test themes."""
catalog_dir = tmp_path / 'config' / 'themes' / 'catalog'
catalog_dir.mkdir(parents=True)
return catalog_dir
@pytest.fixture
def temp_root(tmp_path: Path, temp_catalog_dir: Path) -> Path:
"""Create temporary project root."""
# Create theme_list.json
theme_json = tmp_path / 'config' / 'themes' / 'theme_list.json'
theme_json.parent.mkdir(parents=True, exist_ok=True)
theme_json.write_text('{"themes": []}', encoding='utf-8')
return tmp_path
def write_theme(catalog_dir: Path, filename: str, data: Dict[str, Any]) -> Path:
"""Helper to write a theme YAML file."""
path = catalog_dir / filename
path.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True), encoding='utf-8')
return path
def read_theme(path: Path) -> Dict[str, Any]:
"""Helper to read a theme YAML file."""
return yaml.safe_load(path.read_text(encoding='utf-8'))
class TestThemeEnrichmentPipeline:
"""Tests for ThemeEnrichmentPipeline class."""
def test_init(self, temp_root: Path):
"""Test pipeline initialization."""
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
assert pipeline.root == temp_root
assert pipeline.min_examples == 5
assert pipeline.catalog_dir == temp_root / 'config' / 'themes' / 'catalog'
assert len(pipeline.themes) == 0
def test_load_themes_empty_dir(self, temp_root: Path):
"""Test loading themes from empty directory."""
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
assert len(pipeline.themes) == 0
assert pipeline.stats.total_themes == 0
def test_load_themes_with_valid_files(self, temp_root: Path, temp_catalog_dir: Path):
"""Test loading valid theme files."""
write_theme(temp_catalog_dir, 'landfall.yml', {
'display_name': 'Landfall',
'synergies': ['Ramp', 'Tokens'],
'example_commanders': []
})
write_theme(temp_catalog_dir, 'reanimate.yml', {
'display_name': 'Reanimate',
'synergies': ['Graveyard', 'Mill'],
'example_commanders': ['Meren of Clan Nel Toth']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
assert len(pipeline.themes) == 2
assert pipeline.stats.total_themes == 2
def test_autofill_placeholders_empty_examples(self, temp_root: Path, temp_catalog_dir: Path):
"""Test autofill adds placeholders to themes with no examples."""
write_theme(temp_catalog_dir, 'tokens.yml', {
'display_name': 'Tokens Matter',
'synergies': ['Sacrifice', 'Aristocrats'],
'example_commanders': []
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.autofill_placeholders()
assert pipeline.stats.autofilled == 1
theme = list(pipeline.themes.values())[0]
assert theme.modified
assert 'Tokens Matter Anchor' in theme.data['example_commanders']
assert 'Sacrifice Anchor' in theme.data['example_commanders']
assert 'Aristocrats Anchor' in theme.data['example_commanders']
assert theme.data.get('editorial_quality') == 'draft'
def test_autofill_skips_themes_with_examples(self, temp_root: Path, temp_catalog_dir: Path):
"""Test autofill skips themes that already have examples."""
write_theme(temp_catalog_dir, 'landfall.yml', {
'display_name': 'Landfall',
'synergies': ['Ramp'],
'example_commanders': ['Tatyova, Benthic Druid']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.autofill_placeholders()
assert pipeline.stats.autofilled == 0
theme = list(pipeline.themes.values())[0]
assert not theme.modified
def test_pad_examples_to_minimum(self, temp_root: Path, temp_catalog_dir: Path):
"""Test padding adds placeholders to reach minimum threshold."""
write_theme(temp_catalog_dir, 'ramp.yml', {
'display_name': 'Ramp',
'synergies': ['Landfall', 'BigSpells', 'Hydras'],
'example_commanders': ['Ramp Anchor', 'Landfall Anchor']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
pipeline.load_all_themes()
pipeline.pad_examples()
assert pipeline.stats.padded == 1
theme = list(pipeline.themes.values())[0]
assert theme.modified
assert len(theme.data['example_commanders']) == 5
# Should add synergies first (3rd synergy), then letter suffixes
assert 'Hydras Anchor' in theme.data['example_commanders']
# Should also have letter suffixes for remaining slots
assert any('Anchor B' in cmd or 'Anchor C' in cmd for cmd in theme.data['example_commanders'])
def test_pad_skips_mixed_real_and_placeholder(self, temp_root: Path, temp_catalog_dir: Path):
"""Test padding skips lists with both real and placeholder examples."""
write_theme(temp_catalog_dir, 'tokens.yml', {
'display_name': 'Tokens',
'synergies': ['Sacrifice'],
'example_commanders': ['Krenko, Mob Boss', 'Tokens Anchor']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
pipeline.load_all_themes()
pipeline.pad_examples()
assert pipeline.stats.padded == 0
theme = list(pipeline.themes.values())[0]
assert not theme.modified
def test_cleanup_removes_placeholders_when_real_present(self, temp_root: Path, temp_catalog_dir: Path):
"""Test cleanup removes placeholders when real examples are present.
Note: cleanup only removes entries ending with ' Anchor' (no suffix).
Purge step removes entries with ' Anchor' or ' Anchor X' pattern.
"""
write_theme(temp_catalog_dir, 'lifegain.yml', {
'display_name': 'Lifegain',
'synergies': [],
'example_commanders': [
'Oloro, Ageless Ascetic',
'Lifegain Anchor', # Will be removed
'Trelasarra, Moon Dancer',
]
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.cleanup_placeholders()
assert pipeline.stats.cleaned == 1
theme = list(pipeline.themes.values())[0]
assert theme.modified
assert len(theme.data['example_commanders']) == 2
assert 'Oloro, Ageless Ascetic' in theme.data['example_commanders']
assert 'Trelasarra, Moon Dancer' in theme.data['example_commanders']
assert 'Lifegain Anchor' not in theme.data['example_commanders']
def test_purge_removes_all_anchors(self, temp_root: Path, temp_catalog_dir: Path):
"""Test purge removes all anchor placeholders (even if no real examples)."""
write_theme(temp_catalog_dir, 'counters.yml', {
'display_name': 'Counters',
'synergies': [],
'example_commanders': [
'Counters Anchor',
'Counters Anchor B',
'Counters Anchor C'
]
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.purge_anchors()
assert pipeline.stats.purged == 1
theme = list(pipeline.themes.values())[0]
assert theme.modified
assert theme.data['example_commanders'] == []
def test_augment_from_catalog(self, temp_root: Path, temp_catalog_dir: Path):
"""Test augmentation adds missing fields from catalog."""
# Create catalog JSON
catalog_json = temp_root / 'config' / 'themes' / 'theme_list.json'
catalog_data = {
'themes': [
{
'theme': 'Landfall',
'description': 'Triggers from lands entering',
'popularity_bucket': 'common',
'popularity_hint': 'Very popular',
'deck_archetype': 'Lands'
}
]
}
import json
catalog_json.write_text(json.dumps(catalog_data), encoding='utf-8')
write_theme(temp_catalog_dir, 'landfall.yml', {
'display_name': 'Landfall',
'synergies': ['Ramp'],
'example_commanders': ['Tatyova, Benthic Druid']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.augment_from_catalog()
assert pipeline.stats.augmented == 1
theme = list(pipeline.themes.values())[0]
assert theme.modified
assert theme.data['description'] == 'Triggers from lands entering'
assert theme.data['popularity_bucket'] == 'common'
assert theme.data['popularity_hint'] == 'Very popular'
assert theme.data['deck_archetype'] == 'Lands'
def test_validate_min_examples_warning(self, temp_root: Path, temp_catalog_dir: Path):
"""Test validation warns about insufficient examples."""
write_theme(temp_catalog_dir, 'ramp.yml', {
'display_name': 'Ramp',
'synergies': [],
'example_commanders': ['Ramp Commander']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
pipeline.load_all_themes()
pipeline.validate(enforce_min=False)
assert pipeline.stats.lint_warnings > 0
assert pipeline.stats.lint_errors == 0
def test_validate_min_examples_error(self, temp_root: Path, temp_catalog_dir: Path):
"""Test validation errors on insufficient examples when enforced."""
write_theme(temp_catalog_dir, 'ramp.yml', {
'display_name': 'Ramp',
'synergies': [],
'example_commanders': ['Ramp Commander']
})
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
pipeline.load_all_themes()
pipeline.validate(enforce_min=True)
assert pipeline.stats.lint_errors > 0
def test_write_themes_dry_run(self, temp_root: Path, temp_catalog_dir: Path):
"""Test dry run doesn't write files."""
theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
'display_name': 'Tokens',
'synergies': [],
'example_commanders': []
})
original_content = theme_path.read_text(encoding='utf-8')
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.autofill_placeholders()
# Don't call write_all_themes()
# File should be unchanged
assert theme_path.read_text(encoding='utf-8') == original_content
def test_write_themes_saves_changes(self, temp_root: Path, temp_catalog_dir: Path):
"""Test write_all_themes saves modified files."""
theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
'display_name': 'Tokens',
'synergies': ['Sacrifice'],
'example_commanders': []
})
pipeline = ThemeEnrichmentPipeline(root=temp_root)
pipeline.load_all_themes()
pipeline.autofill_placeholders()
pipeline.write_all_themes()
# File should be updated
updated_data = read_theme(theme_path)
assert len(updated_data['example_commanders']) > 0
assert 'Tokens Anchor' in updated_data['example_commanders']
def test_run_all_full_pipeline(self, temp_root: Path, temp_catalog_dir: Path):
"""Test running the complete enrichment pipeline."""
write_theme(temp_catalog_dir, 'landfall.yml', {
'display_name': 'Landfall',
'synergies': ['Ramp', 'Lands'],
'example_commanders': []
})
write_theme(temp_catalog_dir, 'reanimate.yml', {
'display_name': 'Reanimate',
'synergies': ['Graveyard'],
'example_commanders': []
})
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
stats = pipeline.run_all(write=True, enforce_min=False, strict_lint=False)
assert stats.total_themes == 2
assert stats.autofilled >= 2
assert stats.padded >= 2
# Verify files were updated
landfall_data = read_theme(temp_catalog_dir / 'landfall.yml')
assert len(landfall_data['example_commanders']) >= 5
assert landfall_data.get('editorial_quality') == 'draft'
def test_run_enrichment_pipeline_convenience_function(temp_root: Path, temp_catalog_dir: Path):
"""Test the convenience function wrapper."""
write_theme(temp_catalog_dir, 'tokens.yml', {
'display_name': 'Tokens',
'synergies': ['Sacrifice'],
'example_commanders': []
})
stats = run_enrichment_pipeline(
root=temp_root,
min_examples=3,
write=True,
enforce_min=False,
strict=False,
progress_callback=None,
)
assert isinstance(stats, EnrichmentStats)
assert stats.total_themes == 1
assert stats.autofilled >= 1
# Verify file was written
tokens_data = read_theme(temp_catalog_dir / 'tokens.yml')
assert len(tokens_data['example_commanders']) >= 3

View file

@ -0,0 +1,214 @@
"""Tests for web tag search endpoints."""
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client():
"""Create a test client for the web app."""
# Import here to avoid circular imports
from code.web.app import app
return TestClient(app)
def test_theme_autocomplete_basic(client):
"""Test basic theme autocomplete functionality."""
response = client.get("/commanders/theme-autocomplete?theme=life&limit=5")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
content = response.text
assert "autocomplete-item" in content
assert "Life" in content # Should match tags starting with "life"
assert "tag-count" in content # Should show card counts
def test_theme_autocomplete_min_length(client):
"""Test that theme autocomplete requires minimum 2 characters."""
response = client.get("/commanders/theme-autocomplete?theme=a&limit=5")
# Should fail validation
assert response.status_code == 422
def test_theme_autocomplete_no_matches(client):
"""Test theme autocomplete with query that has no matches."""
response = client.get("/commanders/theme-autocomplete?theme=zzzzzzzzz&limit=5")
assert response.status_code == 200
content = response.text
assert "autocomplete-empty" in content or "No matching themes" in content
def test_theme_autocomplete_limit(client):
"""Test that theme autocomplete respects limit parameter."""
response = client.get("/commanders/theme-autocomplete?theme=a&limit=3")
assert response.status_code in [200, 422] # May fail min_length validation
# Try with valid length
response = client.get("/commanders/theme-autocomplete?theme=to&limit=3")
assert response.status_code == 200
# Count items (rough check - should have at most 3)
content = response.text
item_count = content.count('class="autocomplete-item"')
assert item_count <= 3
def test_api_cards_by_tags_and_logic(client):
"""Test card search with AND logic."""
response = client.get("/api/cards/by-tags?tags=tokens&logic=AND&limit=10")
assert response.status_code == 200
data = response.json()
assert "tags" in data
assert "logic" in data
assert data["logic"] == "AND"
assert "total_matches" in data
assert "cards" in data
assert isinstance(data["cards"], list)
def test_api_cards_by_tags_or_logic(client):
"""Test card search with OR logic."""
response = client.get("/api/cards/by-tags?tags=tokens,sacrifice&logic=OR&limit=10")
assert response.status_code == 200
data = response.json()
assert data["logic"] == "OR"
assert "cards" in data
def test_api_cards_by_tags_invalid_logic(client):
"""Test that invalid logic parameter returns error."""
response = client.get("/api/cards/by-tags?tags=tokens&logic=INVALID&limit=10")
assert response.status_code == 400
data = response.json()
assert "error" in data
def test_api_cards_by_tags_empty_tags(client):
"""Test that empty tags parameter returns error."""
response = client.get("/api/cards/by-tags?tags=&logic=AND&limit=10")
assert response.status_code == 400
data = response.json()
assert "error" in data
def test_api_tags_search(client):
"""Test tag search autocomplete endpoint."""
response = client.get("/api/cards/tags/search?q=life&limit=10")
assert response.status_code == 200
data = response.json()
assert "query" in data
assert data["query"] == "life"
assert "matches" in data
assert isinstance(data["matches"], list)
# Check match structure
if data["matches"]:
match = data["matches"][0]
assert "tag" in match
assert "card_count" in match
assert match["tag"].lower().startswith("life")
def test_api_tags_search_min_length(client):
"""Test that tag search requires minimum 2 characters."""
response = client.get("/api/cards/tags/search?q=a&limit=10")
# Should fail validation
assert response.status_code == 422
def test_api_tags_popular(client):
"""Test popular tags endpoint."""
response = client.get("/api/cards/tags/popular?limit=20")
assert response.status_code == 200
data = response.json()
assert "count" in data
assert "tags" in data
assert isinstance(data["tags"], list)
assert data["count"] == len(data["tags"])
assert data["count"] <= 20
# Check tag structure
if data["tags"]:
tag = data["tags"][0]
assert "tag" in tag
assert "card_count" in tag
assert isinstance(tag["card_count"], int)
# Tags should be sorted by card count (descending)
if len(data["tags"]) > 1:
assert data["tags"][0]["card_count"] >= data["tags"][1]["card_count"]
def test_api_tags_popular_limit(client):
"""Test that popular tags endpoint respects limit."""
response = client.get("/api/cards/tags/popular?limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["tags"]) <= 5
def test_commanders_page_loads(client):
"""Test that commanders page loads successfully."""
response = client.get("/commanders")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
content = response.text
# Should have the theme filter input
assert "commander-theme" in content
assert "theme-suggestions" in content
def test_commanders_page_with_theme_filter(client):
"""Test commanders page with theme query parameter."""
response = client.get("/commanders?theme=tokens")
assert response.status_code == 200
content = response.text
# Should have the theme value in the input
assert 'value="tokens"' in content or "tokens" in content
@pytest.mark.skip(reason="Performance test - run manually")
def test_theme_autocomplete_performance(client):
"""Test that theme autocomplete responds quickly."""
import time
start = time.time()
response = client.get("/commanders/theme-autocomplete?theme=to&limit=20")
elapsed = time.time() - start
assert response.status_code == 200
assert elapsed < 0.05 # Should respond in <50ms
@pytest.mark.skip(reason="Performance test - run manually")
def test_api_tags_search_performance(client):
"""Test that tag search responds quickly."""
import time
start = time.time()
response = client.get("/api/cards/tags/search?q=to&limit=20")
elapsed = time.time() - start
assert response.status_code == 200
assert elapsed < 0.05 # Should respond in <50ms