mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat: theme catalog optimization with tag search and faster enrichment
This commit is contained in:
parent
952b151162
commit
9e6c68f559
26 changed files with 5906 additions and 5688 deletions
19
CHANGELOG.md
19
CHANGELOG.md
|
|
@ -9,21 +9,24 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
### Summary
|
### Summary
|
||||||
Improved performance with new card data storage format. Card queries are now significantly faster with reduced file sizes.
|
Theme catalog improvements with faster processing, new tag search features, and regeneration fixes.
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- **Card Data Consolidation**: All card data now stored in optimized format for faster loading
|
- **Theme Catalog Optimization**:
|
||||||
- Automatic updates after tagging/setup completes
|
- Consolidated theme enrichment pipeline (single pass instead of 7 separate scripts)
|
||||||
- "Rebuild Card Files" button in Setup page for manual refresh
|
- Tag index for fast theme-based card queries
|
||||||
- 87% smaller file sizes with dramatically faster queries
|
- Tag search API with new endpoints for card search, autocomplete, and popular tags
|
||||||
- Maintains multiple backup versions for safety
|
- Commander browser theme autocomplete with keyboard navigation
|
||||||
- **Backward Compatibility**: Existing functionality continues to work without changes
|
- Tag loading infrastructure for batch operations
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
_No unreleased changes yet._
|
_No unreleased changes yet._
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
_No unreleased fixes yet._
|
- **Theme Regeneration**: Theme catalog can now be fully rebuilt from scratch without placeholder data
|
||||||
|
- Fixed "Anchor" placeholder issue when regenerating catalog
|
||||||
|
- Examples now generated from actual card data
|
||||||
|
- Theme export preserves all metadata fields
|
||||||
|
|
||||||
## [2.7.1] - 2025-10-14
|
## [2.7.1] - 2025-10-14
|
||||||
### Summary
|
### Summary
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,23 @@
|
||||||
# MTG Python Deckbuilder ${VERSION}
|
# MTG Python Deckbuilder ${VERSION}
|
||||||
|
|
||||||
### Summary
|
### Summary
|
||||||
Improved performance with new card data storage format. Card queries are now significantly faster with reduced file sizes.
|
Theme catalog improvements with faster processing, tag search features, and regeneration fixes.
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- **Card Data Consolidation**: All card data now stored in optimized format for faster loading
|
- **Theme Catalog Optimization**:
|
||||||
- Automatic updates after tagging/setup completes
|
- Consolidated theme enrichment pipeline
|
||||||
- "Rebuild Card Files" button in Setup page for manual refresh
|
- Tag search API for theme-based card discovery
|
||||||
- 87% smaller file sizes with dramatically faster queries
|
- Commander browser theme autocomplete with keyboard navigation
|
||||||
- Maintains multiple backup versions for safety
|
- Tag index for faster queries
|
||||||
- **Backward Compatibility**: Existing functionality continues to work without changes
|
- **Card Data Consolidation** (from previous release):
|
||||||
|
- Optimized format with smaller file sizes
|
||||||
|
- "Rebuild Card Files" button in Setup page
|
||||||
|
- Automatic updates after tagging/setup
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
_No unreleased changes yet._
|
_No unreleased changes yet._
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
_No unreleased fixes yet._
|
- **Theme Regeneration**: Theme catalog can now be fully rebuilt from scratch
|
||||||
|
- Fixed placeholder data appearing in fresh installations
|
||||||
|
- Examples now generated from actual card data
|
||||||
|
|
|
||||||
|
|
@ -1,203 +0,0 @@
|
||||||
"""
|
|
||||||
Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
|
|
||||||
|
|
||||||
Created: October 8, 2025
|
|
||||||
Purpose: Audit and validate Protection tag precision after implementing grant detection.
|
|
||||||
Can be re-run periodically to check tagging quality.
|
|
||||||
|
|
||||||
This script audits ALL Protection-tagged cards and categorizes them:
|
|
||||||
- Grant: Gives broad protection to other permanents YOU control
|
|
||||||
- Kindred: Gives protection to specific creature types (metadata tags)
|
|
||||||
- Mixed: Both broad and kindred/inherent
|
|
||||||
- Inherent: Only has protection itself
|
|
||||||
- ConditionalSelf: Only conditionally grants to itself
|
|
||||||
- Opponent: Grants to opponent's permanents
|
|
||||||
- Neither: False positive
|
|
||||||
|
|
||||||
Outputs:
|
|
||||||
- m2_audit_v2.json: Full analysis with summary
|
|
||||||
- m2_audit_v2_grant.csv: Cards for main Protection tag
|
|
||||||
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
|
|
||||||
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
|
|
||||||
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
|
|
||||||
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
|
|
||||||
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
|
|
||||||
- m2_audit_v2_neither.csv: False positives (exclude)
|
|
||||||
- m2_audit_v2_all.csv: All cards combined
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
import pandas as pd
|
|
||||||
import json
|
|
||||||
|
|
||||||
# Add project root to path
|
|
||||||
project_root = Path(__file__).parent.parent.parent
|
|
||||||
sys.path.insert(0, str(project_root))
|
|
||||||
|
|
||||||
from code.tagging.protection_grant_detection import (
|
|
||||||
categorize_protection_card,
|
|
||||||
get_kindred_protection_tags,
|
|
||||||
is_granting_protection,
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_all_cards():
|
|
||||||
"""Load all cards from color/identity CSV files."""
|
|
||||||
csv_dir = project_root / 'csv_files'
|
|
||||||
|
|
||||||
# Get all color/identity CSVs (not the raw cards.csv)
|
|
||||||
csv_files = list(csv_dir.glob('*_cards.csv'))
|
|
||||||
csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
|
|
||||||
|
|
||||||
all_cards = []
|
|
||||||
for csv_file in csv_files:
|
|
||||||
try:
|
|
||||||
df = pd.read_csv(csv_file)
|
|
||||||
all_cards.append(df)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Could not load {csv_file.name}: {e}")
|
|
||||||
|
|
||||||
# Combine all DataFrames
|
|
||||||
combined = pd.concat(all_cards, ignore_index=True)
|
|
||||||
|
|
||||||
# Drop duplicates (cards appear in multiple color files)
|
|
||||||
combined = combined.drop_duplicates(subset=['name'], keep='first')
|
|
||||||
|
|
||||||
return combined
|
|
||||||
|
|
||||||
def audit_all_protection_cards():
|
|
||||||
"""Audit all Protection-tagged cards."""
|
|
||||||
print("Loading all cards...")
|
|
||||||
df = load_all_cards()
|
|
||||||
|
|
||||||
print(f"Total cards loaded: {len(df)}")
|
|
||||||
|
|
||||||
# Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
|
|
||||||
df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
|
|
||||||
|
|
||||||
print(f"Protection-tagged cards: {len(df_prot)}")
|
|
||||||
|
|
||||||
# Categorize each card
|
|
||||||
categories = []
|
|
||||||
grants_list = []
|
|
||||||
kindred_tags_list = []
|
|
||||||
|
|
||||||
for idx, row in df_prot.iterrows():
|
|
||||||
name = row['name']
|
|
||||||
text = str(row.get('text', '')).replace('\\n', '\n') # Convert escaped newlines to real newlines
|
|
||||||
keywords = str(row.get('keywords', ''))
|
|
||||||
card_type = str(row.get('type', ''))
|
|
||||||
|
|
||||||
# Categorize with kindred exclusion enabled
|
|
||||||
category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
|
|
||||||
|
|
||||||
# Check if it grants broadly
|
|
||||||
grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
|
|
||||||
|
|
||||||
# Get kindred tags
|
|
||||||
kindred_tags = get_kindred_protection_tags(text)
|
|
||||||
|
|
||||||
categories.append(category)
|
|
||||||
grants_list.append(grants_broad)
|
|
||||||
kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
|
|
||||||
|
|
||||||
df_prot['category'] = categories
|
|
||||||
df_prot['grants_broad'] = grants_list
|
|
||||||
df_prot['kindred_tags'] = kindred_tags_list
|
|
||||||
|
|
||||||
# Generate summary (convert numpy types to native Python for JSON serialization)
|
|
||||||
summary = {
|
|
||||||
'total': int(len(df_prot)),
|
|
||||||
'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
|
|
||||||
'grants_broad_count': int(df_prot['grants_broad'].sum()),
|
|
||||||
'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate keep vs remove
|
|
||||||
keep_categories = {'Grant', 'Mixed'}
|
|
||||||
kindred_only = df_prot[df_prot['category'] == 'Kindred']
|
|
||||||
keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
|
|
||||||
remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
|
|
||||||
|
|
||||||
summary['keep_main_tag'] = keep_count
|
|
||||||
summary['kindred_metadata'] = len(kindred_only)
|
|
||||||
summary['remove'] = remove_count
|
|
||||||
summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
|
|
||||||
|
|
||||||
# Print summary
|
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print("AUDIT SUMMARY")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Total Protection-tagged cards: {summary['total']}")
|
|
||||||
print(f"\nCategories:")
|
|
||||||
for cat, count in sorted(summary['categories'].items()):
|
|
||||||
pct = (count / summary['total']) * 100
|
|
||||||
print(f" {cat:20s} {count:4d} ({pct:5.1f}%)")
|
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
|
||||||
print(f"Main Protection tag: {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
|
|
||||||
print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
|
|
||||||
print(f"Remove: {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
|
|
||||||
print(f"{'='*60}")
|
|
||||||
print(f"Precision estimate: {summary['precision_estimate']}%")
|
|
||||||
print(f"{'='*60}\n")
|
|
||||||
|
|
||||||
# Export results
|
|
||||||
output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# Export JSON summary
|
|
||||||
with open(output_dir / 'm2_audit_v2.json', 'w') as f:
|
|
||||||
json.dump({
|
|
||||||
'summary': summary,
|
|
||||||
'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
|
|
||||||
}, f, indent=2)
|
|
||||||
|
|
||||||
# Export CSVs by category
|
|
||||||
export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
|
|
||||||
|
|
||||||
# Grant category
|
|
||||||
df_grant = df_prot[df_prot['category'] == 'Grant']
|
|
||||||
df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
|
|
||||||
print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
|
|
||||||
|
|
||||||
# Kindred category
|
|
||||||
df_kindred = df_prot[df_prot['category'] == 'Kindred']
|
|
||||||
df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
|
|
||||||
print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
|
|
||||||
|
|
||||||
# Mixed category
|
|
||||||
df_mixed = df_prot[df_prot['category'] == 'Mixed']
|
|
||||||
df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
|
|
||||||
print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
|
|
||||||
|
|
||||||
# ConditionalSelf category
|
|
||||||
df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
|
|
||||||
df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
|
|
||||||
print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
|
|
||||||
|
|
||||||
# Inherent category
|
|
||||||
df_inherent = df_prot[df_prot['category'] == 'Inherent']
|
|
||||||
df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
|
|
||||||
print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
|
|
||||||
|
|
||||||
# Opponent category
|
|
||||||
df_opponent = df_prot[df_prot['category'] == 'Opponent']
|
|
||||||
df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
|
|
||||||
print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
|
|
||||||
|
|
||||||
# Neither category
|
|
||||||
df_neither = df_prot[df_prot['category'] == 'Neither']
|
|
||||||
df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
|
|
||||||
print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
|
|
||||||
|
|
||||||
# All cards
|
|
||||||
df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
|
|
||||||
print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
|
|
||||||
|
|
||||||
print(f"\nAll files saved to: {output_dir}")
|
|
||||||
|
|
||||||
return df_prot, summary
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
df_results, summary = audit_all_protection_cards()
|
|
||||||
|
|
@ -1,118 +0,0 @@
|
||||||
"""Opt-in guard that compares multi-theme filter performance to a stored baseline.
|
|
||||||
|
|
||||||
Run inside the project virtual environment:
|
|
||||||
|
|
||||||
python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json
|
|
||||||
|
|
||||||
The script executes the same profiling loop as `profile_multi_theme_filter` and fails
|
|
||||||
if the observed mean or p95 timings regress more than the allowed threshold.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, Tuple
|
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
|
||||||
DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"
|
|
||||||
|
|
||||||
if str(PROJECT_ROOT) not in sys.path:
|
|
||||||
sys.path.append(str(PROJECT_ROOT))
|
|
||||||
|
|
||||||
from code.scripts.profile_multi_theme_filter import run_profile # type: ignore # noqa: E402
|
|
||||||
|
|
||||||
|
|
||||||
def _load_baseline(path: Path) -> Dict[str, Any]:
|
|
||||||
if not path.exists():
|
|
||||||
raise FileNotFoundError(f"Baseline file not found: {path}")
|
|
||||||
data = json.loads(path.read_text(encoding="utf-8"))
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def _extract(metric: Dict[str, Any], key: str) -> float:
|
|
||||||
try:
|
|
||||||
value = float(metric.get(key, 0.0))
|
|
||||||
except Exception:
|
|
||||||
value = 0.0
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
|
|
||||||
a_mean = _extract(actual, "mean_ms")
|
|
||||||
b_mean = _extract(baseline, "mean_ms")
|
|
||||||
a_p95 = _extract(actual, "p95_ms")
|
|
||||||
b_p95 = _extract(baseline, "p95_ms")
|
|
||||||
|
|
||||||
allowed_mean = b_mean * (1.0 + threshold)
|
|
||||||
allowed_p95 = b_p95 * (1.0 + threshold)
|
|
||||||
|
|
||||||
mean_ok = a_mean <= allowed_mean or b_mean == 0.0
|
|
||||||
p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0
|
|
||||||
|
|
||||||
status = mean_ok and p95_ok
|
|
||||||
|
|
||||||
def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
|
|
||||||
trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
|
|
||||||
trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
|
|
||||||
limit_str = f"≤ {allowed_val:.3f}ms" if baseline_val else "n/a"
|
|
||||||
return f" {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"
|
|
||||||
|
|
||||||
rows = [f"Section: {name}"]
|
|
||||||
rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
|
|
||||||
rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
|
|
||||||
return status, "\n".join(rows)
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str] | None = None) -> int:
|
|
||||||
parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
|
|
||||||
parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
|
|
||||||
parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
|
|
||||||
parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
|
|
||||||
parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
|
|
||||||
parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
|
|
||||||
args = parser.parse_args(argv)
|
|
||||||
|
|
||||||
baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
|
|
||||||
if args.update_baseline and not baseline_path.parent.exists():
|
|
||||||
baseline_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if not args.update_baseline:
|
|
||||||
baseline = _load_baseline(baseline_path)
|
|
||||||
else:
|
|
||||||
baseline = {}
|
|
||||||
|
|
||||||
results = run_profile(args.iterations, args.seed)
|
|
||||||
|
|
||||||
cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
|
|
||||||
synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)
|
|
||||||
|
|
||||||
print("Iterations:", results.get("iterations"))
|
|
||||||
print("Seed:", results.get("seed"))
|
|
||||||
print(cascade_report)
|
|
||||||
print(synergy_report)
|
|
||||||
|
|
||||||
overall_ok = cascade_status and synergy_status
|
|
||||||
|
|
||||||
if args.update_baseline:
|
|
||||||
payload = {
|
|
||||||
"iterations": results.get("iterations"),
|
|
||||||
"seed": results.get("seed"),
|
|
||||||
"cascade": results.get("cascade"),
|
|
||||||
"synergy": results.get("synergy"),
|
|
||||||
}
|
|
||||||
baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
||||||
print(f"Baseline updated → {baseline_path}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if not overall_ok:
|
|
||||||
print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print("PASS: performance within allowed threshold")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main())
|
|
||||||
135
code/scripts/enrich_themes.py
Normal file
135
code/scripts/enrich_themes.py
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
"""CLI wrapper for theme enrichment pipeline.
|
||||||
|
|
||||||
|
Runs the consolidated theme enrichment pipeline with command-line options.
|
||||||
|
For backward compatibility, individual scripts can still be run separately,
|
||||||
|
but this provides a faster single-pass alternative.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python code/scripts/enrich_themes.py --write
|
||||||
|
python code/scripts/enrich_themes.py --dry-run --enforce-min
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add project root to path
|
||||||
|
ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
if str(ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(ROOT))
|
||||||
|
|
||||||
|
# Import after adding to path
|
||||||
|
from code.tagging.theme_enrichment import run_enrichment_pipeline # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
"""Run theme enrichment pipeline from CLI."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Consolidated theme metadata enrichment pipeline',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
# Dry run (no changes written):
|
||||||
|
python code/scripts/enrich_themes.py --dry-run
|
||||||
|
|
||||||
|
# Write changes:
|
||||||
|
python code/scripts/enrich_themes.py --write
|
||||||
|
|
||||||
|
# Enforce minimum examples (errors if insufficient):
|
||||||
|
python code/scripts/enrich_themes.py --write --enforce-min
|
||||||
|
|
||||||
|
# Strict validation for cornerstone themes:
|
||||||
|
python code/scripts/enrich_themes.py --write --strict
|
||||||
|
|
||||||
|
Note: This replaces running 7 separate scripts (autofill, pad, cleanup, purge,
|
||||||
|
augment, suggestions, lint) with a single 5-10x faster operation.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--write',
|
||||||
|
action='store_true',
|
||||||
|
help='Write changes to disk (default: dry run)'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--dry-run',
|
||||||
|
action='store_true',
|
||||||
|
help='Dry run mode: show what would be changed without writing'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--min',
|
||||||
|
'--min-examples',
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
metavar='N',
|
||||||
|
help='Minimum number of example commanders (default: $EDITORIAL_MIN_EXAMPLES or 5)'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--enforce-min',
|
||||||
|
action='store_true',
|
||||||
|
help='Treat minimum examples violations as errors'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--strict',
|
||||||
|
action='store_true',
|
||||||
|
help='Enable strict validation (cornerstone themes must have examples)'
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Determine write mode
|
||||||
|
if args.dry_run:
|
||||||
|
write = False
|
||||||
|
elif args.write:
|
||||||
|
write = True
|
||||||
|
else:
|
||||||
|
# Default to dry run if neither specified
|
||||||
|
write = False
|
||||||
|
print("Note: Running in dry-run mode (use --write to save changes)\n")
|
||||||
|
|
||||||
|
# Get minimum examples threshold
|
||||||
|
if args.min is not None:
|
||||||
|
min_examples = args.min
|
||||||
|
else:
|
||||||
|
min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
|
||||||
|
|
||||||
|
print("Theme Enrichment Pipeline")
|
||||||
|
print("========================")
|
||||||
|
print(f"Mode: {'WRITE' if write else 'DRY RUN'}")
|
||||||
|
print(f"Min examples: {min_examples}")
|
||||||
|
print(f"Enforce min: {args.enforce_min}")
|
||||||
|
print(f"Strict: {args.strict}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
try:
|
||||||
|
stats = run_enrichment_pipeline(
|
||||||
|
root=ROOT,
|
||||||
|
min_examples=min_examples,
|
||||||
|
write=write,
|
||||||
|
enforce_min=args.enforce_min,
|
||||||
|
strict=args.strict,
|
||||||
|
progress_callback=None, # Use default print
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return non-zero if there are lint errors
|
||||||
|
if stats.lint_errors > 0:
|
||||||
|
print(f"\n❌ Enrichment completed with {stats.lint_errors} error(s)")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("\n✅ Enrichment completed successfully")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\nInterrupted by user")
|
||||||
|
return 130
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||||
|
if '--debug' in sys.argv:
|
||||||
|
raise
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
@ -123,6 +123,9 @@ def main():
|
||||||
enforced_set = set(enforced_synergies)
|
enforced_set = set(enforced_synergies)
|
||||||
inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]
|
inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]
|
||||||
|
|
||||||
|
example_cards_value = entry.get('example_cards', [])
|
||||||
|
example_commanders_value = entry.get('example_commanders', [])
|
||||||
|
|
||||||
doc = {
|
doc = {
|
||||||
'id': slug,
|
'id': slug,
|
||||||
'display_name': theme_name,
|
'display_name': theme_name,
|
||||||
|
|
@ -132,13 +135,40 @@ def main():
|
||||||
'inferred_synergies': inferred_synergies,
|
'inferred_synergies': inferred_synergies,
|
||||||
'primary_color': entry.get('primary_color'),
|
'primary_color': entry.get('primary_color'),
|
||||||
'secondary_color': entry.get('secondary_color'),
|
'secondary_color': entry.get('secondary_color'),
|
||||||
|
'example_cards': example_cards_value,
|
||||||
|
'example_commanders': example_commanders_value,
|
||||||
|
'synergy_example_cards': entry.get('synergy_example_cards', []),
|
||||||
|
'synergy_commanders': entry.get('synergy_commanders', []),
|
||||||
|
'deck_archetype': entry.get('deck_archetype'),
|
||||||
|
'popularity_hint': entry.get('popularity_hint'),
|
||||||
|
'popularity_bucket': entry.get('popularity_bucket'),
|
||||||
|
'editorial_quality': entry.get('editorial_quality'),
|
||||||
|
'description': entry.get('description'),
|
||||||
'notes': ''
|
'notes': ''
|
||||||
}
|
}
|
||||||
# Drop None color keys for cleanliness
|
# Drop None/empty keys for cleanliness
|
||||||
if doc['primary_color'] is None:
|
if doc['primary_color'] is None:
|
||||||
doc.pop('primary_color')
|
doc.pop('primary_color')
|
||||||
if doc.get('secondary_color') is None:
|
if doc.get('secondary_color') is None:
|
||||||
doc.pop('secondary_color')
|
doc.pop('secondary_color')
|
||||||
|
if not doc.get('example_cards'):
|
||||||
|
doc.pop('example_cards')
|
||||||
|
if not doc.get('example_commanders'):
|
||||||
|
doc.pop('example_commanders')
|
||||||
|
if not doc.get('synergy_example_cards'):
|
||||||
|
doc.pop('synergy_example_cards')
|
||||||
|
if not doc.get('synergy_commanders'):
|
||||||
|
doc.pop('synergy_commanders')
|
||||||
|
if doc.get('deck_archetype') is None:
|
||||||
|
doc.pop('deck_archetype')
|
||||||
|
if doc.get('popularity_hint') is None:
|
||||||
|
doc.pop('popularity_hint')
|
||||||
|
if doc.get('popularity_bucket') is None:
|
||||||
|
doc.pop('popularity_bucket')
|
||||||
|
if doc.get('editorial_quality') is None:
|
||||||
|
doc.pop('editorial_quality')
|
||||||
|
if doc.get('description') is None:
|
||||||
|
doc.pop('description')
|
||||||
with path.open('w', encoding='utf-8') as f:
|
with path.open('w', encoding='utf-8') as f:
|
||||||
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
|
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
|
||||||
exported += 1
|
exported += 1
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,13 @@ from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterable, List, Optional, Sequence
|
from typing import Dict, Iterable, List, Optional, Sequence
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
HAS_PANDAS = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_PANDAS = False
|
||||||
|
pd = None # type: ignore
|
||||||
|
|
||||||
ROOT = Path(__file__).resolve().parents[2]
|
ROOT = Path(__file__).resolve().parents[2]
|
||||||
CODE_ROOT = ROOT / "code"
|
CODE_ROOT = ROOT / "code"
|
||||||
if str(CODE_ROOT) not in sys.path:
|
if str(CODE_ROOT) not in sys.path:
|
||||||
|
|
@ -29,6 +36,9 @@ try:
|
||||||
except Exception: # pragma: no cover - fallback for adhoc execution
|
except Exception: # pragma: no cover - fallback for adhoc execution
|
||||||
DEFAULT_CSV_DIRECTORY = "csv_files"
|
DEFAULT_CSV_DIRECTORY = "csv_files"
|
||||||
|
|
||||||
|
# Parquet support requires pandas (imported at top of file, uses pyarrow under the hood)
|
||||||
|
HAS_PARQUET_SUPPORT = HAS_PANDAS
|
||||||
|
|
||||||
DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
|
DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
|
||||||
HEADER_COMMENT_PREFIX = "# theme_catalog"
|
HEADER_COMMENT_PREFIX = "# theme_catalog"
|
||||||
|
|
||||||
|
|
@ -87,7 +97,68 @@ def parse_theme_tags(value: object) -> List[str]:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _load_theme_counts_from_parquet(
|
||||||
|
parquet_path: Path,
|
||||||
|
theme_variants: Dict[str, set[str]]
|
||||||
|
) -> Counter[str]:
|
||||||
|
"""Load theme counts from a parquet file using pandas (which uses pyarrow).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parquet_path: Path to the parquet file (commander_cards.parquet or all_cards.parquet)
|
||||||
|
theme_variants: Dict to accumulate theme name variants
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Counter of theme occurrences
|
||||||
|
"""
|
||||||
|
if pd is None:
|
||||||
|
return Counter()
|
||||||
|
|
||||||
|
counts: Counter[str] = Counter()
|
||||||
|
|
||||||
|
if not parquet_path.exists():
|
||||||
|
return counts
|
||||||
|
|
||||||
|
# Read only themeTags column for efficiency
|
||||||
|
try:
|
||||||
|
df = pd.read_parquet(parquet_path, columns=["themeTags"])
|
||||||
|
except Exception:
|
||||||
|
# If themeTags column doesn't exist, return empty
|
||||||
|
return counts
|
||||||
|
|
||||||
|
# Convert to list for fast iteration (faster than iterrows)
|
||||||
|
theme_tags_list = df["themeTags"].tolist()
|
||||||
|
|
||||||
|
for raw_value in theme_tags_list:
|
||||||
|
if raw_value is None or (isinstance(raw_value, float) and pd.isna(raw_value)):
|
||||||
|
continue
|
||||||
|
tags = parse_theme_tags(raw_value)
|
||||||
|
if not tags:
|
||||||
|
continue
|
||||||
|
seen_in_row: set[str] = set()
|
||||||
|
for tag in tags:
|
||||||
|
display = normalize_theme_display(tag)
|
||||||
|
if not display:
|
||||||
|
continue
|
||||||
|
key = canonical_key(display)
|
||||||
|
if key in seen_in_row:
|
||||||
|
continue
|
||||||
|
seen_in_row.add(key)
|
||||||
|
counts[key] += 1
|
||||||
|
theme_variants[key].add(display)
|
||||||
|
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
|
def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
|
||||||
|
"""Load theme counts from CSV file (fallback method).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
csv_path: Path to CSV file
|
||||||
|
theme_variants: Dict to accumulate theme name variants
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Counter of theme occurrences
|
||||||
|
"""
|
||||||
counts: Counter[str] = Counter()
|
counts: Counter[str] = Counter()
|
||||||
if not csv_path.exists():
|
if not csv_path.exists():
|
||||||
return counts
|
return counts
|
||||||
|
|
@ -146,24 +217,67 @@ def build_theme_catalog(
|
||||||
commander_filename: str = "commander_cards.csv",
|
commander_filename: str = "commander_cards.csv",
|
||||||
cards_filename: str = "cards.csv",
|
cards_filename: str = "cards.csv",
|
||||||
logs_directory: Optional[Path] = None,
|
logs_directory: Optional[Path] = None,
|
||||||
|
use_parquet: bool = True,
|
||||||
) -> CatalogBuildResult:
|
) -> CatalogBuildResult:
|
||||||
|
"""Build theme catalog from card data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
csv_directory: Directory containing CSV files (fallback)
|
||||||
|
output_path: Where to write the catalog CSV
|
||||||
|
generated_at: Optional timestamp for generation
|
||||||
|
commander_filename: Name of commander CSV file
|
||||||
|
cards_filename: Name of cards CSV file
|
||||||
|
logs_directory: Optional directory to copy output to
|
||||||
|
use_parquet: If True, try to use all_cards.parquet first (default: True)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
CatalogBuildResult with generated rows and metadata
|
||||||
|
"""
|
||||||
csv_directory = csv_directory.resolve()
|
csv_directory = csv_directory.resolve()
|
||||||
output_path = output_path.resolve()
|
output_path = output_path.resolve()
|
||||||
|
|
||||||
theme_variants: Dict[str, set[str]] = defaultdict(set)
|
theme_variants: Dict[str, set[str]] = defaultdict(set)
|
||||||
|
|
||||||
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
|
# Try to use parquet file first (much faster)
|
||||||
|
used_parquet = False
|
||||||
|
if use_parquet and HAS_PARQUET_SUPPORT:
|
||||||
|
try:
|
||||||
|
# Use dedicated parquet files (matches CSV structure exactly)
|
||||||
|
parquet_dir = csv_directory.parent / "card_files"
|
||||||
|
|
||||||
|
# Load commander counts directly from commander_cards.parquet
|
||||||
|
commander_parquet = parquet_dir / "commander_cards.parquet"
|
||||||
|
commander_counts = _load_theme_counts_from_parquet(
|
||||||
|
commander_parquet, theme_variants=theme_variants
|
||||||
|
)
|
||||||
|
|
||||||
|
# CSV method doesn't load non-commander cards, so we don't either
|
||||||
|
card_counts = Counter()
|
||||||
|
|
||||||
|
used_parquet = True
|
||||||
|
print("✓ Loaded theme data from parquet files")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠ Failed to load from parquet: {e}")
|
||||||
|
print(" Falling back to CSV files...")
|
||||||
|
used_parquet = False
|
||||||
|
|
||||||
|
# Fallback to CSV files if parquet not available or failed
|
||||||
|
if not used_parquet:
|
||||||
|
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
|
||||||
|
|
||||||
card_counts: Counter[str] = Counter()
|
card_counts: Counter[str] = Counter()
|
||||||
cards_path = csv_directory / cards_filename
|
cards_path = csv_directory / cards_filename
|
||||||
if cards_path.exists():
|
if cards_path.exists():
|
||||||
card_counts = _load_theme_counts(cards_path, theme_variants)
|
card_counts = _load_theme_counts(cards_path, theme_variants)
|
||||||
else:
|
else:
|
||||||
# Fallback: scan all *_cards.csv except commander
|
# Fallback: scan all *_cards.csv except commander
|
||||||
for candidate in csv_directory.glob("*_cards.csv"):
|
for candidate in csv_directory.glob("*_cards.csv"):
|
||||||
if candidate.name == commander_filename:
|
if candidate.name == commander_filename:
|
||||||
continue
|
continue
|
||||||
card_counts += _load_theme_counts(candidate, theme_variants)
|
card_counts += _load_theme_counts(candidate, theme_variants)
|
||||||
|
|
||||||
|
print("✓ Loaded theme data from CSV files")
|
||||||
|
|
||||||
keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
|
keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
|
||||||
generated_at_iso = _derive_generated_at(generated_at)
|
generated_at_iso = _derive_generated_at(generated_at)
|
||||||
|
|
|
||||||
|
|
@ -1,305 +0,0 @@
|
||||||
"""Catalog diff helper for verifying multi-face merge output.
|
|
||||||
|
|
||||||
This utility regenerates the card CSV catalog (optionally writing compatibility
|
|
||||||
snapshots) and then compares the merged outputs against the baseline snapshots.
|
|
||||||
It is intended to support the MDFC rollout checklist by providing a concise summary
|
|
||||||
of how many rows were merged, which cards collapsed into a single record, and
|
|
||||||
whether any tag unions diverge from expectations.
|
|
||||||
|
|
||||||
Example usage (from repo root, inside virtualenv):
|
|
||||||
|
|
||||||
python -m code.scripts.preview_dfc_catalog_diff --compat-snapshot --output logs/dfc_catalog_diff.json
|
|
||||||
|
|
||||||
The script prints a human readable summary to stdout and optionally writes a JSON
|
|
||||||
artifact for release/staging review.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import ast
|
|
||||||
import importlib
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from collections import Counter
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, Iterable, List, Sequence
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from settings import COLORS, CSV_DIRECTORY
|
|
||||||
|
|
||||||
DEFAULT_COMPAT_DIR = Path(os.getenv("DFC_COMPAT_DIR", "csv_files/compat_faces"))
|
|
||||||
CSV_ROOT = Path(CSV_DIRECTORY)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_list_cell(value: Any) -> List[str]:
|
|
||||||
"""Convert serialized list cells ("['A', 'B']") into Python lists."""
|
|
||||||
if isinstance(value, list):
|
|
||||||
return [str(item) for item in value]
|
|
||||||
if value is None:
|
|
||||||
return []
|
|
||||||
if isinstance(value, float) and pd.isna(value): # type: ignore[arg-type]
|
|
||||||
return []
|
|
||||||
text = str(value).strip()
|
|
||||||
if not text:
|
|
||||||
return []
|
|
||||||
try:
|
|
||||||
parsed = ast.literal_eval(text)
|
|
||||||
except (SyntaxError, ValueError):
|
|
||||||
return [text]
|
|
||||||
if isinstance(parsed, list):
|
|
||||||
return [str(item) for item in parsed]
|
|
||||||
return [str(parsed)]
|
|
||||||
|
|
||||||
|
|
||||||
def _load_catalog(path: Path) -> pd.DataFrame:
|
|
||||||
if not path.exists():
|
|
||||||
raise FileNotFoundError(f"Catalog file missing: {path}")
|
|
||||||
df = pd.read_csv(path)
|
|
||||||
for column in ("themeTags", "keywords", "creatureTypes"):
|
|
||||||
if column in df.columns:
|
|
||||||
df[column] = df[column].apply(_parse_list_cell)
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def _multi_face_names(df: pd.DataFrame) -> List[str]:
|
|
||||||
counts = Counter(df.get("name", []))
|
|
||||||
return [name for name, count in counts.items() if isinstance(name, str) and count > 1]
|
|
||||||
|
|
||||||
|
|
||||||
def _collect_tags(series: Iterable[List[str]]) -> List[str]:
|
|
||||||
tags: List[str] = []
|
|
||||||
for value in series:
|
|
||||||
if isinstance(value, list):
|
|
||||||
tags.extend(str(item) for item in value)
|
|
||||||
return sorted(set(tags))
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_color(
|
|
||||||
color: str,
|
|
||||||
merged: pd.DataFrame,
|
|
||||||
baseline: pd.DataFrame,
|
|
||||||
sample_size: int,
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
merged_names = set(merged.get("name", []))
|
|
||||||
baseline_names = list(baseline.get("name", []))
|
|
||||||
baseline_name_set = set(name for name in baseline_names if isinstance(name, str))
|
|
||||||
|
|
||||||
multi_face = _multi_face_names(baseline)
|
|
||||||
collapsed = []
|
|
||||||
tag_mismatches: List[str] = []
|
|
||||||
missing_after_merge: List[str] = []
|
|
||||||
|
|
||||||
for name in multi_face:
|
|
||||||
group = baseline[baseline["name"] == name]
|
|
||||||
merged_row = merged[merged["name"] == name]
|
|
||||||
if merged_row.empty:
|
|
||||||
missing_after_merge.append(name)
|
|
||||||
continue
|
|
||||||
expected_tags = _collect_tags(group["themeTags"]) if "themeTags" in group else []
|
|
||||||
merged_tags = _collect_tags(merged_row.iloc[[0]]["themeTags"]) if "themeTags" in merged_row else []
|
|
||||||
if expected_tags != merged_tags:
|
|
||||||
tag_mismatches.append(name)
|
|
||||||
collapsed.append(name)
|
|
||||||
|
|
||||||
removed_names = sorted(baseline_name_set - merged_names)
|
|
||||||
added_names = sorted(merged_names - baseline_name_set)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"rows_merged": len(merged),
|
|
||||||
"rows_baseline": len(baseline),
|
|
||||||
"row_delta": len(merged) - len(baseline),
|
|
||||||
"multi_face_groups": len(multi_face),
|
|
||||||
"collapsed_sample": collapsed[:sample_size],
|
|
||||||
"tag_union_mismatches": tag_mismatches[:sample_size],
|
|
||||||
"missing_after_merge": missing_after_merge[:sample_size],
|
|
||||||
"removed_names": removed_names[:sample_size],
|
|
||||||
"added_names": added_names[:sample_size],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _refresh_catalog(colors: Sequence[str], compat_snapshot: bool) -> None:
|
|
||||||
os.environ.pop("ENABLE_DFC_MERGE", None)
|
|
||||||
os.environ["DFC_COMPAT_SNAPSHOT"] = "1" if compat_snapshot else "0"
|
|
||||||
importlib.invalidate_caches()
|
|
||||||
# Reload tagger to pick up the new env var
|
|
||||||
tagger = importlib.import_module("code.tagging.tagger")
|
|
||||||
tagger = importlib.reload(tagger) # type: ignore[assignment]
|
|
||||||
|
|
||||||
for color in colors:
|
|
||||||
tagger.load_dataframe(color)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_diff(
|
|
||||||
colors: Sequence[str],
|
|
||||||
compat_dir: Path,
|
|
||||||
sample_size: int,
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
per_color: Dict[str, Any] = {}
|
|
||||||
overall = {
|
|
||||||
"total_rows_merged": 0,
|
|
||||||
"total_rows_baseline": 0,
|
|
||||||
"total_multi_face_groups": 0,
|
|
||||||
"colors": len(colors),
|
|
||||||
"tag_union_mismatches": 0,
|
|
||||||
"missing_after_merge": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
for color in colors:
|
|
||||||
merged_path = CSV_ROOT / f"{color}_cards.csv"
|
|
||||||
baseline_path = compat_dir / f"{color}_cards_unmerged.csv"
|
|
||||||
merged_df = _load_catalog(merged_path)
|
|
||||||
baseline_df = _load_catalog(baseline_path)
|
|
||||||
summary = _summarize_color(color, merged_df, baseline_df, sample_size)
|
|
||||||
per_color[color] = summary
|
|
||||||
overall["total_rows_merged"] += summary["rows_merged"]
|
|
||||||
overall["total_rows_baseline"] += summary["rows_baseline"]
|
|
||||||
overall["total_multi_face_groups"] += summary["multi_face_groups"]
|
|
||||||
overall["tag_union_mismatches"] += len(summary["tag_union_mismatches"])
|
|
||||||
overall["missing_after_merge"] += len(summary["missing_after_merge"])
|
|
||||||
|
|
||||||
overall["row_delta_total"] = overall["total_rows_merged"] - overall["total_rows_baseline"]
|
|
||||||
return {"overall": overall, "per_color": per_color}
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: List[str]) -> int:
|
|
||||||
parser = argparse.ArgumentParser(description="Preview merged vs baseline DFC catalog diff")
|
|
||||||
parser.add_argument(
|
|
||||||
"--skip-refresh",
|
|
||||||
action="store_true",
|
|
||||||
help="Skip rebuilding the catalog in compatibility mode (requires existing compat snapshots)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--mode",
|
|
||||||
default="",
|
|
||||||
help="[Deprecated] Legacy ENABLE_DFC_MERGE value (compat|1|0 etc.)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--compat-snapshot",
|
|
||||||
dest="compat_snapshot",
|
|
||||||
action="store_true",
|
|
||||||
help="Write compatibility snapshots before diffing (default: off unless legacy --mode compat)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-compat-snapshot",
|
|
||||||
dest="compat_snapshot",
|
|
||||||
action="store_false",
|
|
||||||
help="Skip compatibility snapshots even if legacy --mode compat is supplied",
|
|
||||||
)
|
|
||||||
parser.set_defaults(compat_snapshot=None)
|
|
||||||
parser.add_argument(
|
|
||||||
"--colors",
|
|
||||||
nargs="*",
|
|
||||||
help="Optional subset of colors to diff (defaults to full COLORS list)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--compat-dir",
|
|
||||||
type=Path,
|
|
||||||
default=DEFAULT_COMPAT_DIR,
|
|
||||||
help="Directory containing unmerged compatibility snapshots (default: %(default)s)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--output",
|
|
||||||
type=Path,
|
|
||||||
help="Optional JSON file to write with the diff summary",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--sample-size",
|
|
||||||
type=int,
|
|
||||||
default=10,
|
|
||||||
help="Number of sample entries to include per section (default: %(default)s)",
|
|
||||||
)
|
|
||||||
args = parser.parse_args(argv)
|
|
||||||
|
|
||||||
colors = tuple(args.colors) if args.colors else tuple(COLORS)
|
|
||||||
compat_dir = args.compat_dir
|
|
||||||
|
|
||||||
mode = str(args.mode or "").strip().lower()
|
|
||||||
if mode and mode not in {"compat", "dual", "both", "1", "on", "true", "0", "off", "false", "disabled"}:
|
|
||||||
print(
|
|
||||||
f"ℹ Legacy --mode value '{mode}' detected; merge remains enabled. Use --compat-snapshot as needed.",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.compat_snapshot is None:
|
|
||||||
compat_snapshot = mode in {"compat", "dual", "both"}
|
|
||||||
else:
|
|
||||||
compat_snapshot = args.compat_snapshot
|
|
||||||
if mode:
|
|
||||||
print(
|
|
||||||
"ℹ Ignoring deprecated --mode value because --compat-snapshot/--no-compat-snapshot was supplied.",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if mode in {"0", "off", "false", "disabled"}:
|
|
||||||
print(
|
|
||||||
"⚠ ENABLE_DFC_MERGE=off is deprecated; the merge remains enabled regardless of the value.",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not args.skip_refresh:
|
|
||||||
start = time.perf_counter()
|
|
||||||
_refresh_catalog(colors, compat_snapshot)
|
|
||||||
duration = time.perf_counter() - start
|
|
||||||
snapshot_msg = "with compat snapshot" if compat_snapshot else "merged-only"
|
|
||||||
print(f"✔ Refreshed catalog in {duration:.1f}s ({snapshot_msg})")
|
|
||||||
else:
|
|
||||||
print("ℹ Using existing catalog outputs (refresh skipped)")
|
|
||||||
|
|
||||||
try:
|
|
||||||
diff = generate_diff(colors, compat_dir, args.sample_size)
|
|
||||||
except FileNotFoundError as exc:
|
|
||||||
print(f"ERROR: {exc}")
|
|
||||||
print("Run without --skip-refresh (or ensure compat snapshots exist).", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
overall = diff["overall"]
|
|
||||||
print("\n=== DFC Catalog Diff Summary ===")
|
|
||||||
print(
|
|
||||||
f"Merged rows: {overall['total_rows_merged']:,} | Baseline rows: {overall['total_rows_baseline']:,} | "
|
|
||||||
f"Δ rows: {overall['row_delta_total']:,}"
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"Multi-face groups: {overall['total_multi_face_groups']:,} | "
|
|
||||||
f"Tag union mismatches: {overall['tag_union_mismatches']} | Missing after merge: {overall['missing_after_merge']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for color, summary in diff["per_color"].items():
|
|
||||||
print(f"\n[{color}] baseline={summary['rows_baseline']} merged={summary['rows_merged']} Δ={summary['row_delta']}")
|
|
||||||
if summary["multi_face_groups"]:
|
|
||||||
print(f" multi-face groups: {summary['multi_face_groups']}")
|
|
||||||
if summary["collapsed_sample"]:
|
|
||||||
sample = ", ".join(summary["collapsed_sample"][:3])
|
|
||||||
print(f" collapsed sample: {sample}")
|
|
||||||
if summary["tag_union_mismatches"]:
|
|
||||||
print(f" TAG MISMATCH sample: {', '.join(summary['tag_union_mismatches'])}")
|
|
||||||
if summary["missing_after_merge"]:
|
|
||||||
print(f" MISSING sample: {', '.join(summary['missing_after_merge'])}")
|
|
||||||
if summary["removed_names"]:
|
|
||||||
print(f" removed sample: {', '.join(summary['removed_names'])}")
|
|
||||||
if summary["added_names"]:
|
|
||||||
print(f" added sample: {', '.join(summary['added_names'])}")
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
payload = {
|
|
||||||
"captured_at": int(time.time()),
|
|
||||||
"mode": args.mode,
|
|
||||||
"colors": colors,
|
|
||||||
"compat_dir": str(compat_dir),
|
|
||||||
"summary": diff,
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
|
||||||
print(f"\n📄 Wrote JSON summary to {args.output}")
|
|
||||||
except Exception as exc: # pragma: no cover
|
|
||||||
print(f"Failed to write output file {args.output}: {exc}", file=sys.stderr)
|
|
||||||
return 3
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
|
|
@ -1,105 +0,0 @@
|
||||||
"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
|
|
||||||
|
|
||||||
Usage (from repo root virtualenv):
|
|
||||||
python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
|
|
||||||
|
|
||||||
Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
|
|
||||||
human-readable summary to stdout.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict
|
|
||||||
|
|
||||||
import urllib.request
|
|
||||||
import urllib.error
|
|
||||||
|
|
||||||
DEFAULT_URL = "http://localhost:8000/themes/metrics"
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_metrics(url: str) -> Dict[str, Any]:
|
|
||||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
|
||||||
with urllib.request.urlopen(req, timeout=10) as resp: # nosec B310 (local trusted)
|
|
||||||
data = resp.read().decode("utf-8", "replace")
|
|
||||||
try:
|
|
||||||
return json.loads(data) # type: ignore[return-value]
|
|
||||||
except json.JSONDecodeError as e: # pragma: no cover - unlikely if server OK
|
|
||||||
raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
|
|
||||||
|
|
||||||
|
|
||||||
def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
|
|
||||||
preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
|
|
||||||
per_theme = preview.get("per_theme") or {}
|
|
||||||
# Compute top slow themes by avg_ms
|
|
||||||
items = []
|
|
||||||
for slug, info in per_theme.items():
|
|
||||||
if not isinstance(info, dict):
|
|
||||||
continue
|
|
||||||
avg = info.get("avg_ms")
|
|
||||||
if isinstance(avg, (int, float)):
|
|
||||||
items.append((slug, float(avg), info))
|
|
||||||
items.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
top = items[:top_n]
|
|
||||||
return {
|
|
||||||
"preview_requests": preview.get("preview_requests"),
|
|
||||||
"preview_cache_hits": preview.get("preview_cache_hits"),
|
|
||||||
"preview_avg_build_ms": preview.get("preview_avg_build_ms"),
|
|
||||||
"preview_p95_build_ms": preview.get("preview_p95_build_ms"),
|
|
||||||
"preview_ttl_seconds": preview.get("preview_ttl_seconds"),
|
|
||||||
"editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
|
|
||||||
"top_slowest": [
|
|
||||||
{
|
|
||||||
"slug": slug,
|
|
||||||
"avg_ms": avg,
|
|
||||||
"p95_ms": info.get("p95_ms"),
|
|
||||||
"builds": info.get("builds"),
|
|
||||||
"requests": info.get("requests"),
|
|
||||||
"avg_curated_pct": info.get("avg_curated_pct"),
|
|
||||||
}
|
|
||||||
for slug, avg, info in top
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
ap = argparse.ArgumentParser(description="Snapshot preview metrics")
|
|
||||||
ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
|
|
||||||
ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
|
|
||||||
ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
|
|
||||||
ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
|
|
||||||
args = ap.parse_args(argv)
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = fetch_metrics(args.url)
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
summary = summarize(raw, args.limit)
|
|
||||||
snapshot = {
|
|
||||||
"captured_at": int(time.time()),
|
|
||||||
"source": args.url,
|
|
||||||
"summary": summary,
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
|
|
||||||
except Exception as e: # pragma: no cover
|
|
||||||
print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
|
|
||||||
return 3
|
|
||||||
|
|
||||||
if not args.quiet:
|
|
||||||
print("Preview Metrics Snapshot:")
|
|
||||||
print(json.dumps(summary, indent=2))
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
|
|
@ -1,349 +0,0 @@
|
||||||
"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
|
|
||||||
|
|
||||||
Runs warm-up plus measured request loops against several theme slugs and prints
|
|
||||||
aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
|
|
||||||
establish or validate that refactor did not introduce >5% p95 regression.
|
|
||||||
|
|
||||||
Usage (ensure server running locally – commonly :8080 in docker compose):
|
|
||||||
python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
|
|
||||||
--url http://localhost:8080 --warm 1 --limit 12
|
|
||||||
|
|
||||||
Theme slug discovery hierarchy (when --theme not provided):
|
|
||||||
1. Try /themes/index.json (legacy / planned static index)
|
|
||||||
2. Fallback to /themes/api/themes (current API) and take the first N ids
|
|
||||||
The discovered slugs are sorted deterministically then truncated to N.
|
|
||||||
|
|
||||||
NOTE: This is intentionally minimal (no external deps). For stable comparisons
|
|
||||||
run with identical parameters pre/post-change and commit the JSON output under
|
|
||||||
logs/perf/.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import statistics
|
|
||||||
import time
|
|
||||||
from typing import Any, Dict, List
|
|
||||||
import urllib.request
|
|
||||||
import urllib.error
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_json(url: str) -> Dict[str, Any]:
|
|
||||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
|
||||||
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 local dev
|
|
||||||
data = resp.read().decode("utf-8", "replace")
|
|
||||||
return json.loads(data) # type: ignore[return-value]
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]:
|
|
||||||
last_error: Exception | None = None
|
|
||||||
for attempt in range(1, attempts + 1):
|
|
||||||
try:
|
|
||||||
return _fetch_json(url)
|
|
||||||
except Exception as exc: # pragma: no cover - network variability
|
|
||||||
last_error = exc
|
|
||||||
if attempt < attempts:
|
|
||||||
print(json.dumps({ # noqa: T201
|
|
||||||
"event": "preview_perf_fetch_retry",
|
|
||||||
"url": url,
|
|
||||||
"attempt": attempt,
|
|
||||||
"max_attempts": attempts,
|
|
||||||
"error": str(exc),
|
|
||||||
}))
|
|
||||||
time.sleep(delay * attempt)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
raise last_error # pragma: no cover - defensive; should be unreachable
|
|
||||||
|
|
||||||
|
|
||||||
def select_theme_slugs(base_url: str, count: int) -> List[str]:
|
|
||||||
"""Discover theme slugs for benchmarking.
|
|
||||||
|
|
||||||
Attempts legacy static index first, then falls back to live API listing.
|
|
||||||
"""
|
|
||||||
errors: List[str] = []
|
|
||||||
slugs: List[str] = []
|
|
||||||
# Attempt 1: legacy /themes/index.json
|
|
||||||
try:
|
|
||||||
idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
|
|
||||||
entries = idx.get("themes") or []
|
|
||||||
for it in entries:
|
|
||||||
if not isinstance(it, dict):
|
|
||||||
continue
|
|
||||||
slug = it.get("slug") or it.get("id") or it.get("theme_id")
|
|
||||||
if isinstance(slug, str):
|
|
||||||
slugs.append(slug)
|
|
||||||
except Exception as e: # pragma: no cover - network variability
|
|
||||||
errors.append(f"index.json failed: {e}")
|
|
||||||
|
|
||||||
if not slugs:
|
|
||||||
# Attempt 2: live API listing
|
|
||||||
try:
|
|
||||||
listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
|
|
||||||
items = listing.get("items") or []
|
|
||||||
for it in items:
|
|
||||||
if not isinstance(it, dict):
|
|
||||||
continue
|
|
||||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
|
||||||
if isinstance(tid, str):
|
|
||||||
slugs.append(tid)
|
|
||||||
except Exception as e: # pragma: no cover - network variability
|
|
||||||
errors.append(f"api/themes failed: {e}")
|
|
||||||
|
|
||||||
slugs = sorted(set(slugs))[:count]
|
|
||||||
if not slugs:
|
|
||||||
raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
|
|
||||||
return slugs
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
|
|
||||||
"""Fetch all theme slugs via paginated /themes/api/themes endpoint.
|
|
||||||
|
|
||||||
Uses maximum page size (200) and iterates using offset until no next page.
|
|
||||||
Returns deterministic sorted unique list of slugs.
|
|
||||||
"""
|
|
||||||
slugs: List[str] = []
|
|
||||||
offset = 0
|
|
||||||
seen: set[str] = set()
|
|
||||||
page_attempts = 5
|
|
||||||
page_delay = 1.2
|
|
||||||
while True:
|
|
||||||
url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
|
|
||||||
data: Dict[str, Any] | None = None
|
|
||||||
last_error: Exception | None = None
|
|
||||||
for attempt in range(1, page_attempts + 1):
|
|
||||||
try:
|
|
||||||
data = _fetch_json_with_retry(url, attempts=4, delay=0.75)
|
|
||||||
break
|
|
||||||
except Exception as exc: # pragma: no cover - network variability
|
|
||||||
last_error = exc
|
|
||||||
if attempt < page_attempts:
|
|
||||||
print(json.dumps({ # noqa: T201
|
|
||||||
"event": "preview_perf_page_retry",
|
|
||||||
"offset": offset,
|
|
||||||
"attempt": attempt,
|
|
||||||
"max_attempts": page_attempts,
|
|
||||||
"error": str(exc),
|
|
||||||
}))
|
|
||||||
time.sleep(page_delay * attempt)
|
|
||||||
else:
|
|
||||||
raise SystemExit(f"Failed fetching themes page offset={offset}: {exc}")
|
|
||||||
if data is None: # pragma: no cover - defensive
|
|
||||||
raise SystemExit(f"Failed fetching themes page offset={offset}: {last_error}")
|
|
||||||
items = data.get("items") or []
|
|
||||||
for it in items:
|
|
||||||
if not isinstance(it, dict):
|
|
||||||
continue
|
|
||||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
|
||||||
if isinstance(tid, str) and tid not in seen:
|
|
||||||
seen.add(tid)
|
|
||||||
slugs.append(tid)
|
|
||||||
next_offset = data.get("next_offset")
|
|
||||||
if not next_offset or next_offset == offset:
|
|
||||||
break
|
|
||||||
offset = int(next_offset)
|
|
||||||
return sorted(slugs)
|
|
||||||
|
|
||||||
|
|
||||||
def percentile(values: List[float], pct: float) -> float:
|
|
||||||
if not values:
|
|
||||||
return 0.0
|
|
||||||
sv = sorted(values)
|
|
||||||
k = (len(sv) - 1) * pct
|
|
||||||
f = int(k)
|
|
||||||
c = min(f + 1, len(sv) - 1)
|
|
||||||
if f == c:
|
|
||||||
return sv[f]
|
|
||||||
d0 = sv[f] * (c - k)
|
|
||||||
d1 = sv[c] * (k - f)
|
|
||||||
return d0 + d1
|
|
||||||
|
|
||||||
|
|
||||||
def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
|
|
||||||
latencies: List[float] = []
|
|
||||||
per_slug_counts = {s: 0 for s in slugs}
|
|
||||||
t_start = time.time()
|
|
||||||
for i in range(loops):
|
|
||||||
slug = slugs[i % len(slugs)]
|
|
||||||
# path_template may contain {slug} and {limit}
|
|
||||||
try:
|
|
||||||
rel = path_template.format(slug=slug, limit=limit)
|
|
||||||
except Exception:
|
|
||||||
rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
|
|
||||||
if not rel.startswith('/'):
|
|
||||||
rel = '/' + rel
|
|
||||||
url = f"{base_url.rstrip('/')}{rel}"
|
|
||||||
t0 = time.time()
|
|
||||||
try:
|
|
||||||
_fetch_json(url)
|
|
||||||
except Exception as e:
|
|
||||||
print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)})) # noqa: T201
|
|
||||||
continue
|
|
||||||
ms = (time.time() - t0) * 1000.0
|
|
||||||
latencies.append(ms)
|
|
||||||
per_slug_counts[slug] += 1
|
|
||||||
elapsed = time.time() - t_start
|
|
||||||
return {
|
|
||||||
"warm": warm,
|
|
||||||
"loops": loops,
|
|
||||||
"slugs": slugs,
|
|
||||||
"per_slug_requests": per_slug_counts,
|
|
||||||
"elapsed_s": round(elapsed, 3),
|
|
||||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
|
||||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
|
||||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
|
||||||
"avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
|
|
||||||
"count": len(latencies),
|
|
||||||
"_latencies": latencies, # internal (removed in final result unless explicitly retained)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
|
|
||||||
if not latencies:
|
|
||||||
return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
|
|
||||||
return {
|
|
||||||
"count": len(latencies),
|
|
||||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
|
||||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
|
||||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
|
||||||
"avg_ms": round(statistics.mean(latencies), 2),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: List[str]) -> int:
|
|
||||||
ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
|
|
||||||
ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
|
|
||||||
ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
|
|
||||||
ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
|
|
||||||
ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
|
|
||||||
ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
|
|
||||||
ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
|
|
||||||
ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
|
|
||||||
ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
|
|
||||||
ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
|
|
||||||
ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
|
|
||||||
# Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
|
|
||||||
# We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
|
|
||||||
ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
|
|
||||||
ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
|
|
||||||
args = ap.parse_args(argv)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if args.explicit_theme:
|
|
||||||
slugs = args.explicit_theme
|
|
||||||
elif args.all:
|
|
||||||
slugs = fetch_all_theme_slugs(args.url)
|
|
||||||
else:
|
|
||||||
slugs = select_theme_slugs(args.url, args.themes)
|
|
||||||
except SystemExit as e: # pragma: no cover - dependency on live server
|
|
||||||
print(str(e), file=sys.stderr)
|
|
||||||
return 2
|
|
||||||
|
|
||||||
mode = "all" if args.all else "subset"
|
|
||||||
total_slugs = len(slugs)
|
|
||||||
if args.all and not args.loops_explicit:
|
|
||||||
# Derive loops = passes * total_slugs
|
|
||||||
args.loops = max(1, args.passes) * total_slugs
|
|
||||||
|
|
||||||
print(json.dumps({ # noqa: T201
|
|
||||||
"event": "preview_perf_start",
|
|
||||||
"mode": mode,
|
|
||||||
"total_slugs": total_slugs,
|
|
||||||
"planned_loops": args.loops,
|
|
||||||
"passes": args.passes if args.all else None,
|
|
||||||
}))
|
|
||||||
|
|
||||||
# Execution paths:
|
|
||||||
# 1. Standard subset or single-pass all: warm cycles -> single measured run
|
|
||||||
# 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
|
|
||||||
if args.all and args.passes > 1:
|
|
||||||
pass_results: List[Dict[str, Any]] = []
|
|
||||||
combined_latencies: List[float] = []
|
|
||||||
t0_all = time.time()
|
|
||||||
for p in range(1, args.passes + 1):
|
|
||||||
r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
|
|
||||||
lat = r.pop("_latencies", [])
|
|
||||||
combined_latencies.extend(lat)
|
|
||||||
pass_result = {
|
|
||||||
"pass": p,
|
|
||||||
"warm": r["warm"],
|
|
||||||
"elapsed_s": r["elapsed_s"],
|
|
||||||
"p50_ms": r["p50_ms"],
|
|
||||||
"p90_ms": r["p90_ms"],
|
|
||||||
"p95_ms": r["p95_ms"],
|
|
||||||
"avg_ms": r["avg_ms"],
|
|
||||||
"count": r["count"],
|
|
||||||
}
|
|
||||||
pass_results.append(pass_result)
|
|
||||||
total_elapsed = round(time.time() - t0_all, 3)
|
|
||||||
aggregate = _stats_from_latencies(combined_latencies)
|
|
||||||
result = {
|
|
||||||
"mode": mode,
|
|
||||||
"total_slugs": total_slugs,
|
|
||||||
"passes": args.passes,
|
|
||||||
"slugs": slugs,
|
|
||||||
"combined": {
|
|
||||||
**aggregate,
|
|
||||||
"elapsed_s": total_elapsed,
|
|
||||||
},
|
|
||||||
"passes_results": pass_results,
|
|
||||||
"cold_pass_p95_ms": pass_results[0]["p95_ms"],
|
|
||||||
"warm_pass_p95_ms": pass_results[-1]["p95_ms"],
|
|
||||||
"cold_pass_p50_ms": pass_results[0]["p50_ms"],
|
|
||||||
"warm_pass_p50_ms": pass_results[-1]["p50_ms"],
|
|
||||||
}
|
|
||||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
|
||||||
# Optional warm baseline extraction (final pass only; represents warmed steady-state)
|
|
||||||
if args.extract_warm_baseline:
|
|
||||||
try:
|
|
||||||
wb = pass_results[-1]
|
|
||||||
warm_obj = {
|
|
||||||
"event": "preview_perf_warm_baseline",
|
|
||||||
"mode": mode,
|
|
||||||
"total_slugs": total_slugs,
|
|
||||||
"warm_baseline": True,
|
|
||||||
"source_pass": wb["pass"],
|
|
||||||
"p50_ms": wb["p50_ms"],
|
|
||||||
"p90_ms": wb["p90_ms"],
|
|
||||||
"p95_ms": wb["p95_ms"],
|
|
||||||
"avg_ms": wb["avg_ms"],
|
|
||||||
"count": wb["count"],
|
|
||||||
"slugs": slugs,
|
|
||||||
}
|
|
||||||
args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
|
|
||||||
print(json.dumps({ # noqa: T201
|
|
||||||
"event": "preview_perf_warm_baseline_written",
|
|
||||||
"path": str(args.extract_warm_baseline),
|
|
||||||
"p95_ms": wb["p95_ms"],
|
|
||||||
}))
|
|
||||||
except Exception as e: # pragma: no cover
|
|
||||||
print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)})) # noqa: T201
|
|
||||||
else:
|
|
||||||
# Warm-up loops first (if requested)
|
|
||||||
for w in range(args.warm):
|
|
||||||
run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
|
|
||||||
result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
|
|
||||||
result.pop("_latencies", None)
|
|
||||||
result["slugs"] = slugs
|
|
||||||
result["mode"] = mode
|
|
||||||
result["total_slugs"] = total_slugs
|
|
||||||
if args.all:
|
|
||||||
result["passes"] = args.passes
|
|
||||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
# Ensure we write the final result object (multi-pass already prepared above)
|
|
||||||
args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
|
|
||||||
except Exception as e: # pragma: no cover
|
|
||||||
print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
|
|
||||||
return 3
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
|
|
@ -1,106 +0,0 @@
|
||||||
"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
|
|
||||||
then compare against the committed warm baseline with threshold enforcement.
|
|
||||||
|
|
||||||
Intended usage (example):
|
|
||||||
python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
|
|
||||||
--baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
|
|
||||||
|
|
||||||
Exit codes:
|
|
||||||
0 success (within threshold)
|
|
||||||
2 regression (p95 delta > threshold)
|
|
||||||
3 setup / usage error
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
|
|
||||||
a warmed steady-state (server should have background refresh / typical load).
|
|
||||||
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import urllib.error
|
|
||||||
import urllib.request
|
|
||||||
from pathlib import Path
|
|
||||||
def _wait_for_service(base_url: str, attempts: int = 12, delay: float = 1.5) -> bool:
|
|
||||||
health_url = base_url.rstrip("/") + "/healthz"
|
|
||||||
last_error: Exception | None = None
|
|
||||||
for attempt in range(1, attempts + 1):
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(health_url, timeout=5) as resp: # nosec B310 local CI
|
|
||||||
if 200 <= resp.status < 300:
|
|
||||||
return True
|
|
||||||
except urllib.error.HTTPError as exc:
|
|
||||||
last_error = exc
|
|
||||||
if 400 <= exc.code < 500 and exc.code != 429:
|
|
||||||
# Treat permanent client errors (other than rate limit) as fatal
|
|
||||||
break
|
|
||||||
except Exception as exc: # pragma: no cover - network variability
|
|
||||||
last_error = exc
|
|
||||||
time.sleep(delay * attempt)
|
|
||||||
print(json.dumps({
|
|
||||||
"event": "ci_perf_error",
|
|
||||||
"stage": "startup",
|
|
||||||
"message": "Service health check failed",
|
|
||||||
"url": health_url,
|
|
||||||
"attempts": attempts,
|
|
||||||
"error": str(last_error) if last_error else None,
|
|
||||||
}))
|
|
||||||
return False
|
|
||||||
|
|
||||||
def run(cmd: list[str]) -> subprocess.CompletedProcess:
|
|
||||||
return subprocess.run(cmd, capture_output=True, text=True, check=False)
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
|
|
||||||
ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
|
|
||||||
ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
|
|
||||||
ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
|
|
||||||
ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
|
|
||||||
ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
|
|
||||||
args = ap.parse_args(argv)
|
|
||||||
|
|
||||||
if not args.baseline.exists():
|
|
||||||
print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
|
|
||||||
return 3
|
|
||||||
|
|
||||||
if not _wait_for_service(args.url):
|
|
||||||
return 3
|
|
||||||
|
|
||||||
# Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
|
|
||||||
# If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
|
|
||||||
passes = "2" if args.multi_pass else "1"
|
|
||||||
bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
|
|
||||||
bench_proc = run(bench_cmd)
|
|
||||||
if bench_proc.returncode != 0:
|
|
||||||
print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
|
|
||||||
return 3
|
|
||||||
print(bench_proc.stdout)
|
|
||||||
|
|
||||||
if not args.candidate_output.exists():
|
|
||||||
print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
|
|
||||||
return 3
|
|
||||||
|
|
||||||
compare_cmd = [
|
|
||||||
sys.executable,
|
|
||||||
"-m","code.scripts.preview_perf_compare",
|
|
||||||
"--baseline", str(args.baseline),
|
|
||||||
"--candidate", str(args.candidate_output),
|
|
||||||
"--warm-only",
|
|
||||||
"--p95-threshold", str(args.p95_threshold),
|
|
||||||
]
|
|
||||||
cmp_proc = run(compare_cmd)
|
|
||||||
print(cmp_proc.stdout)
|
|
||||||
if cmp_proc.returncode == 2:
|
|
||||||
# Already printed JSON with failure status
|
|
||||||
return 2
|
|
||||||
if cmp_proc.returncode != 0:
|
|
||||||
print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
|
|
||||||
return 3
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
|
|
@ -1,115 +0,0 @@
|
||||||
"""Compare two preview benchmark JSON result files and emit delta stats.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
|
|
||||||
|
|
||||||
Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
|
|
||||||
If multi-pass structures are present (combined & passes_results) those are included.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict
|
|
||||||
|
|
||||||
|
|
||||||
def load(path: Path) -> Dict[str, Any]:
|
|
||||||
data = json.loads(path.read_text(encoding="utf-8"))
|
|
||||||
# Multi-pass result may store stats under combined
|
|
||||||
if "combined" in data:
|
|
||||||
core = data["combined"].copy()
|
|
||||||
# Inject representative fields for uniform comparison
|
|
||||||
core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
|
|
||||||
core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
|
|
||||||
core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
|
|
||||||
core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
|
|
||||||
data["_core_stats"] = core
|
|
||||||
else:
|
|
||||||
data["_core_stats"] = {
|
|
||||||
k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
|
|
||||||
}
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def pct_delta(new: float, old: float) -> float:
|
|
||||||
if old == 0:
|
|
||||||
return 0.0
|
|
||||||
return round(((new - old) / old) * 100.0, 2)
|
|
||||||
|
|
||||||
|
|
||||||
def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
b = baseline["_core_stats"]
|
|
||||||
c = candidate["_core_stats"]
|
|
||||||
result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
|
|
||||||
for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
|
|
||||||
if b.get(k) is not None and c.get(k) is not None:
|
|
||||||
result[k] = {
|
|
||||||
"baseline": b[k],
|
|
||||||
"candidate": c[k],
|
|
||||||
"delta_pct": pct_delta(c[k], b[k]),
|
|
||||||
}
|
|
||||||
# If both have per-pass details include first and last pass p95/p50
|
|
||||||
if "passes_results" in baseline and "passes_results" in candidate:
|
|
||||||
result["passes"] = {
|
|
||||||
"baseline": {
|
|
||||||
"cold_p95": baseline.get("cold_pass_p95_ms"),
|
|
||||||
"warm_p95": baseline.get("warm_pass_p95_ms"),
|
|
||||||
"cold_p50": baseline.get("cold_pass_p50_ms"),
|
|
||||||
"warm_p50": baseline.get("warm_pass_p50_ms"),
|
|
||||||
},
|
|
||||||
"candidate": {
|
|
||||||
"cold_p95": candidate.get("cold_pass_p95_ms"),
|
|
||||||
"warm_p95": candidate.get("warm_pass_p95_ms"),
|
|
||||||
"cold_p50": candidate.get("cold_pass_p50_ms"),
|
|
||||||
"warm_p50": candidate.get("warm_pass_p50_ms"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
|
|
||||||
ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
|
|
||||||
ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
|
|
||||||
ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
|
|
||||||
ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
|
|
||||||
args = ap.parse_args(argv)
|
|
||||||
if not args.baseline.exists():
|
|
||||||
raise SystemExit(f"Baseline not found: {args.baseline}")
|
|
||||||
if not args.candidate.exists():
|
|
||||||
raise SystemExit(f"Candidate not found: {args.candidate}")
|
|
||||||
baseline = load(args.baseline)
|
|
||||||
candidate = load(args.candidate)
|
|
||||||
# If warm-only requested and both have warm pass stats, override _core_stats before compare
|
|
||||||
if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
|
|
||||||
baseline["_core_stats"] = {
|
|
||||||
"p50_ms": baseline.get("warm_pass_p50_ms"),
|
|
||||||
"p90_ms": baseline.get("_core_stats", {}).get("p90_ms"), # p90 not tracked per-pass; retain combined
|
|
||||||
"p95_ms": baseline.get("warm_pass_p95_ms"),
|
|
||||||
"avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
|
|
||||||
"count": baseline.get("_core_stats", {}).get("count"),
|
|
||||||
}
|
|
||||||
candidate["_core_stats"] = {
|
|
||||||
"p50_ms": candidate.get("warm_pass_p50_ms"),
|
|
||||||
"p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
|
|
||||||
"p95_ms": candidate.get("warm_pass_p95_ms"),
|
|
||||||
"avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
|
|
||||||
"count": candidate.get("_core_stats", {}).get("count"),
|
|
||||||
}
|
|
||||||
cmp = compare(baseline, candidate)
|
|
||||||
payload = {"event": "preview_perf_compare", **cmp}
|
|
||||||
if args.p95_threshold is not None and "p95_ms" in cmp:
|
|
||||||
delta = cmp["p95_ms"]["delta_pct"]
|
|
||||||
payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
|
|
||||||
if delta is not None and delta > args.p95_threshold:
|
|
||||||
payload["result"] = "fail"
|
|
||||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
|
||||||
return 2
|
|
||||||
payload["result"] = "pass"
|
|
||||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
raise SystemExit(main(__import__('sys').argv[1:]))
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
"""Generate warm preview traffic to populate theme preview cache & metrics.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
|
|
||||||
--base-url http://localhost:8000 --delay 0.05
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
- FastAPI server running locally exposing /themes endpoints
|
|
||||||
- WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
|
|
||||||
|
|
||||||
Strategy:
|
|
||||||
1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
|
|
||||||
2. Extract theme slugs via regex on data-theme-id attributes.
|
|
||||||
3. Issue REPEATS preview fragment requests per slug in order.
|
|
||||||
4. Print simple timing / status summary.
|
|
||||||
|
|
||||||
This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import urllib.request
|
|
||||||
import urllib.error
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
LIST_PATH = "/themes/fragment/list"
|
|
||||||
PREVIEW_PATH = "/themes/fragment/preview/{slug}"
|
|
||||||
|
|
||||||
|
|
||||||
def fetch(url: str) -> str:
|
|
||||||
req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
|
|
||||||
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 (local trusted)
|
|
||||||
return resp.read().decode("utf-8", "replace")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_slugs(html: str, limit: int) -> List[str]:
|
|
||||||
slugs = []
|
|
||||||
for m in re.finditer(r'data-theme-id="([^"]+)"', html):
|
|
||||||
s = m.group(1).strip()
|
|
||||||
if s and s not in slugs:
|
|
||||||
slugs.append(s)
|
|
||||||
if len(slugs) >= limit:
|
|
||||||
break
|
|
||||||
return slugs
|
|
||||||
|
|
||||||
|
|
||||||
def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
|
|
||||||
list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
|
|
||||||
print(f"[warm] Fetching list: {list_url}")
|
|
||||||
try:
|
|
||||||
html = fetch(list_url)
|
|
||||||
except urllib.error.URLError as e: # pragma: no cover
|
|
||||||
raise SystemExit(f"Failed fetching list: {e}")
|
|
||||||
slugs = extract_slugs(html, count)
|
|
||||||
if not slugs:
|
|
||||||
raise SystemExit("No theme slugs extracted – cannot warm.")
|
|
||||||
print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
|
|
||||||
total_requests = 0
|
|
||||||
start = time.time()
|
|
||||||
for r in range(repeats):
|
|
||||||
print(f"[warm] Pass {r+1}/{repeats}")
|
|
||||||
for slug in slugs:
|
|
||||||
url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
|
|
||||||
try:
|
|
||||||
fetch(url)
|
|
||||||
except Exception as e: # pragma: no cover
|
|
||||||
print(f" [warn] Failed {slug}: {e}")
|
|
||||||
else:
|
|
||||||
total_requests += 1
|
|
||||||
if delay:
|
|
||||||
time.sleep(delay)
|
|
||||||
dur = time.time() - start
|
|
||||||
print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
|
|
||||||
print("[warm] Done. Now run metrics snapshot to capture warm p95.")
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str]) -> int:
|
|
||||||
ap = argparse.ArgumentParser(description="Generate warm preview traffic")
|
|
||||||
ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
|
|
||||||
ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
|
|
||||||
ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
|
|
||||||
ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
|
|
||||||
args = ap.parse_args(argv)
|
|
||||||
warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
import sys
|
|
||||||
raise SystemExit(main(sys.argv[1:]))
|
|
||||||
425
code/tagging/tag_index.py
Normal file
425
code/tagging/tag_index.py
Normal file
|
|
@ -0,0 +1,425 @@
|
||||||
|
"""Fast tag indexing for reverse lookups and bulk operations.
|
||||||
|
|
||||||
|
Provides a reverse index (tag → cards) for efficient tag-based queries.
|
||||||
|
Typical queries complete in <1ms after index is built.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Build index from all_cards
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
# Query cards with specific tag
|
||||||
|
cards = index.get_cards_with_tag("ramp") # Returns set of card names
|
||||||
|
|
||||||
|
# Query cards with multiple tags (AND logic)
|
||||||
|
cards = index.get_cards_with_all_tags(["tokens", "sacrifice"])
|
||||||
|
|
||||||
|
# Query cards with any of several tags (OR logic)
|
||||||
|
cards = index.get_cards_with_any_tags(["lifegain", "lifelink"])
|
||||||
|
|
||||||
|
# Get tags for a specific card
|
||||||
|
tags = index.get_tags_for_card("Sol Ring")
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Set, Optional
|
||||||
|
|
||||||
|
from code.logging_util import get_logger
|
||||||
|
from code.services.all_cards_loader import AllCardsLoader
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
# Default cache path for persisted index
|
||||||
|
DEFAULT_CACHE_PATH = Path("card_files/.tag_index_metadata.json")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IndexStats:
|
||||||
|
"""Statistics about the tag index."""
|
||||||
|
total_cards: int
|
||||||
|
total_tags: int
|
||||||
|
total_mappings: int
|
||||||
|
build_time_seconds: float
|
||||||
|
indexed_at: float # Unix timestamp
|
||||||
|
all_cards_mtime: float # Unix timestamp of source file
|
||||||
|
|
||||||
|
|
||||||
|
class TagIndex:
|
||||||
|
"""Fast reverse index for tag-based card queries.
|
||||||
|
|
||||||
|
Builds two indexes:
|
||||||
|
- tag → set(card names) - Reverse index for fast tag queries
|
||||||
|
- card → list(tags) - Forward index for card tag lookups
|
||||||
|
|
||||||
|
Performance:
|
||||||
|
- Index build: <5s for 50k cards
|
||||||
|
- Query time: <1ms per lookup
|
||||||
|
- Memory: ~50-100MB for 30k cards
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache_path: Optional[Path] = None):
|
||||||
|
"""Initialize empty tag index.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cache_path: Path to persist index (default: card_files/.tag_index_metadata.json)
|
||||||
|
"""
|
||||||
|
self._tag_to_cards: Dict[str, Set[str]] = {}
|
||||||
|
self._card_to_tags: Dict[str, List[str]] = {}
|
||||||
|
self._stats: Optional[IndexStats] = None
|
||||||
|
self._cache_path = cache_path or DEFAULT_CACHE_PATH
|
||||||
|
self._loader = AllCardsLoader()
|
||||||
|
|
||||||
|
def build(self, force_rebuild: bool = False) -> IndexStats:
|
||||||
|
"""Build the tag index from all_cards.
|
||||||
|
|
||||||
|
Loads all_cards and creates reverse index. If a cached index exists
|
||||||
|
and is up-to-date, loads from cache instead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
force_rebuild: If True, rebuild even if cache is valid
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
IndexStats with build metrics
|
||||||
|
"""
|
||||||
|
# Check if we can use cached index
|
||||||
|
if not force_rebuild and self._try_load_from_cache():
|
||||||
|
logger.info(f"Loaded tag index from cache: {self._stats.total_cards} cards, {self._stats.total_tags} tags")
|
||||||
|
return self._stats
|
||||||
|
|
||||||
|
logger.info("Building tag index from all_cards...")
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
|
||||||
|
# Load all cards
|
||||||
|
df = self._loader.load()
|
||||||
|
|
||||||
|
if "themeTags" not in df.columns:
|
||||||
|
logger.warning("themeTags column not found in all_cards")
|
||||||
|
self._stats = IndexStats(
|
||||||
|
total_cards=0,
|
||||||
|
total_tags=0,
|
||||||
|
total_mappings=0,
|
||||||
|
build_time_seconds=0,
|
||||||
|
indexed_at=time.time(),
|
||||||
|
all_cards_mtime=0
|
||||||
|
)
|
||||||
|
return self._stats
|
||||||
|
|
||||||
|
# Clear existing indexes
|
||||||
|
self._tag_to_cards.clear()
|
||||||
|
self._card_to_tags.clear()
|
||||||
|
|
||||||
|
# Build indexes
|
||||||
|
total_mappings = 0
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
name = row.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tags = self._normalize_tags(row.get("themeTags", []))
|
||||||
|
if not tags:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Store forward mapping (card → tags)
|
||||||
|
self._card_to_tags[name] = tags
|
||||||
|
|
||||||
|
# Build reverse mapping (tag → cards)
|
||||||
|
for tag in tags:
|
||||||
|
if tag not in self._tag_to_cards:
|
||||||
|
self._tag_to_cards[tag] = set()
|
||||||
|
self._tag_to_cards[tag].add(name)
|
||||||
|
total_mappings += 1
|
||||||
|
|
||||||
|
build_time = time.perf_counter() - start_time
|
||||||
|
|
||||||
|
# Get all_cards mtime for cache validation
|
||||||
|
all_cards_mtime = 0
|
||||||
|
if os.path.exists(self._loader.file_path):
|
||||||
|
all_cards_mtime = os.path.getmtime(self._loader.file_path)
|
||||||
|
|
||||||
|
self._stats = IndexStats(
|
||||||
|
total_cards=len(self._card_to_tags),
|
||||||
|
total_tags=len(self._tag_to_cards),
|
||||||
|
total_mappings=total_mappings,
|
||||||
|
build_time_seconds=build_time,
|
||||||
|
indexed_at=time.time(),
|
||||||
|
all_cards_mtime=all_cards_mtime
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Built tag index: {self._stats.total_cards} cards, "
|
||||||
|
f"{self._stats.total_tags} unique tags, "
|
||||||
|
f"{self._stats.total_mappings} mappings in {build_time:.2f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save to cache
|
||||||
|
self._save_to_cache()
|
||||||
|
|
||||||
|
return self._stats
|
||||||
|
|
||||||
|
def _normalize_tags(self, tags: object) -> List[str]:
|
||||||
|
"""Normalize tags from various formats to list of strings.
|
||||||
|
|
||||||
|
Handles:
|
||||||
|
- List of strings/objects
|
||||||
|
- String representations like "['tag1', 'tag2']"
|
||||||
|
- Comma-separated strings
|
||||||
|
- Empty/None values
|
||||||
|
"""
|
||||||
|
if not tags:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if isinstance(tags, list):
|
||||||
|
# Already a list - normalize to strings
|
||||||
|
return [str(t).strip() for t in tags if t and str(t).strip()]
|
||||||
|
|
||||||
|
if isinstance(tags, str):
|
||||||
|
# Handle empty or list repr
|
||||||
|
if not tags or tags == "[]":
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Try parsing as list repr
|
||||||
|
if tags.startswith("["):
|
||||||
|
import ast
|
||||||
|
try:
|
||||||
|
parsed = ast.literal_eval(tags)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
return [str(t).strip() for t in parsed if t and str(t).strip()]
|
||||||
|
except (ValueError, SyntaxError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to comma-separated
|
||||||
|
return [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_cards_with_tag(self, tag: str) -> Set[str]:
|
||||||
|
"""Get all card names that have a specific tag.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tag: Theme tag to search for (case-sensitive)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of card names with the tag (empty if tag not found)
|
||||||
|
|
||||||
|
Performance: O(1) lookup after index is built
|
||||||
|
"""
|
||||||
|
return self._tag_to_cards.get(tag, set()).copy()
|
||||||
|
|
||||||
|
def get_cards_with_all_tags(self, tags: List[str]) -> Set[str]:
|
||||||
|
"""Get cards that have ALL specified tags (AND logic).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tags: List of tags (card must have all of them)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of card names with all tags (empty if no matches)
|
||||||
|
|
||||||
|
Performance: O(k) where k is number of tags
|
||||||
|
"""
|
||||||
|
if not tags:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
# Start with cards for first tag
|
||||||
|
result = self.get_cards_with_tag(tags[0])
|
||||||
|
|
||||||
|
# Intersect with cards for each additional tag
|
||||||
|
for tag in tags[1:]:
|
||||||
|
result &= self.get_cards_with_tag(tag)
|
||||||
|
if not result:
|
||||||
|
# Short-circuit if no cards remain
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_cards_with_any_tags(self, tags: List[str]) -> Set[str]:
|
||||||
|
"""Get cards that have ANY of the specified tags (OR logic).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tags: List of tags (card needs at least one)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Set of card names with at least one tag
|
||||||
|
|
||||||
|
Performance: O(k) where k is number of tags
|
||||||
|
"""
|
||||||
|
result: Set[str] = set()
|
||||||
|
for tag in tags:
|
||||||
|
result |= self.get_cards_with_tag(tag)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_tags_for_card(self, card_name: str) -> List[str]:
|
||||||
|
"""Get all tags for a specific card.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
card_name: Name of the card
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of theme tags for the card (empty if not found)
|
||||||
|
|
||||||
|
Performance: O(1) lookup
|
||||||
|
"""
|
||||||
|
return self._card_to_tags.get(card_name, []).copy()
|
||||||
|
|
||||||
|
def get_all_tags(self) -> List[str]:
|
||||||
|
"""Get list of all tags in the index.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sorted list of all unique tags
|
||||||
|
"""
|
||||||
|
return sorted(self._tag_to_cards.keys())
|
||||||
|
|
||||||
|
def get_tag_stats(self, tag: str) -> Dict[str, int]:
|
||||||
|
"""Get statistics for a specific tag.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tag: Tag to get stats for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with 'card_count' key
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"card_count": len(self._tag_to_cards.get(tag, set()))
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_popular_tags(self, limit: int = 50) -> List[tuple[str, int]]:
|
||||||
|
"""Get most popular tags sorted by card count.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: Maximum number of tags to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (tag, card_count) tuples sorted by count descending
|
||||||
|
"""
|
||||||
|
tag_counts = [
|
||||||
|
(tag, len(cards))
|
||||||
|
for tag, cards in self._tag_to_cards.items()
|
||||||
|
]
|
||||||
|
tag_counts.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return tag_counts[:limit]
|
||||||
|
|
||||||
|
def _save_to_cache(self) -> None:
|
||||||
|
"""Save index to cache file."""
|
||||||
|
if not self._stats:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
cache_data = {
|
||||||
|
"stats": {
|
||||||
|
"total_cards": self._stats.total_cards,
|
||||||
|
"total_tags": self._stats.total_tags,
|
||||||
|
"total_mappings": self._stats.total_mappings,
|
||||||
|
"build_time_seconds": self._stats.build_time_seconds,
|
||||||
|
"indexed_at": self._stats.indexed_at,
|
||||||
|
"all_cards_mtime": self._stats.all_cards_mtime
|
||||||
|
},
|
||||||
|
"tag_to_cards": {
|
||||||
|
tag: list(cards)
|
||||||
|
for tag, cards in self._tag_to_cards.items()
|
||||||
|
},
|
||||||
|
"card_to_tags": self._card_to_tags
|
||||||
|
}
|
||||||
|
|
||||||
|
self._cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with self._cache_path.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(cache_data, f, indent=2)
|
||||||
|
|
||||||
|
logger.debug(f"Saved tag index cache to {self._cache_path}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to save tag index cache: {e}")
|
||||||
|
|
||||||
|
def _try_load_from_cache(self) -> bool:
|
||||||
|
"""Try to load index from cache file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if cache loaded successfully and is up-to-date
|
||||||
|
"""
|
||||||
|
if not self._cache_path.exists():
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self._cache_path.open("r", encoding="utf-8") as f:
|
||||||
|
cache_data = json.load(f)
|
||||||
|
|
||||||
|
# Check if cache is up-to-date
|
||||||
|
stats_data = cache_data.get("stats", {})
|
||||||
|
cached_mtime = stats_data.get("all_cards_mtime", 0)
|
||||||
|
|
||||||
|
current_mtime = 0
|
||||||
|
if os.path.exists(self._loader.file_path):
|
||||||
|
current_mtime = os.path.getmtime(self._loader.file_path)
|
||||||
|
|
||||||
|
if current_mtime > cached_mtime:
|
||||||
|
logger.debug("Tag index cache outdated (all_cards modified)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Load indexes
|
||||||
|
self._tag_to_cards = {
|
||||||
|
tag: set(cards)
|
||||||
|
for tag, cards in cache_data.get("tag_to_cards", {}).items()
|
||||||
|
}
|
||||||
|
self._card_to_tags = cache_data.get("card_to_tags", {})
|
||||||
|
|
||||||
|
# Restore stats
|
||||||
|
self._stats = IndexStats(**stats_data)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load tag index cache: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def clear_cache(self) -> None:
|
||||||
|
"""Delete the cached index file."""
|
||||||
|
if self._cache_path.exists():
|
||||||
|
self._cache_path.unlink()
|
||||||
|
logger.debug(f"Deleted tag index cache: {self._cache_path}")
|
||||||
|
|
||||||
|
def get_stats(self) -> Optional[IndexStats]:
|
||||||
|
"""Get index statistics.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
IndexStats if index has been built, None otherwise
|
||||||
|
"""
|
||||||
|
return self._stats
|
||||||
|
|
||||||
|
|
||||||
|
# Global index instance
|
||||||
|
_global_index: Optional[TagIndex] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_tag_index(force_rebuild: bool = False) -> TagIndex:
|
||||||
|
"""Get or create the global tag index.
|
||||||
|
|
||||||
|
Lazy-loads the index on first access. Subsequent calls return
|
||||||
|
the cached instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
force_rebuild: If True, rebuild the index even if cached
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Global TagIndex instance
|
||||||
|
"""
|
||||||
|
global _global_index
|
||||||
|
|
||||||
|
if _global_index is None or force_rebuild:
|
||||||
|
_global_index = TagIndex()
|
||||||
|
_global_index.build(force_rebuild=force_rebuild)
|
||||||
|
elif _global_index._stats is None:
|
||||||
|
# Index exists but hasn't been built yet
|
||||||
|
_global_index.build()
|
||||||
|
|
||||||
|
return _global_index
|
||||||
|
|
||||||
|
|
||||||
|
def clear_global_index() -> None:
|
||||||
|
"""Clear the global tag index instance."""
|
||||||
|
global _global_index
|
||||||
|
if _global_index:
|
||||||
|
_global_index.clear_cache()
|
||||||
|
_global_index = None
|
||||||
229
code/tagging/tag_loader.py
Normal file
229
code/tagging/tag_loader.py
Normal file
|
|
@ -0,0 +1,229 @@
|
||||||
|
"""Efficient tag loading using consolidated all_cards file.
|
||||||
|
|
||||||
|
Provides batch tag loading functions that leverage the all_cards.parquet file
|
||||||
|
instead of reading individual card CSV files. This is 10-50x faster for bulk
|
||||||
|
operations like deck building.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Load tags for multiple cards at once
|
||||||
|
tags_dict = load_tags_for_cards(["Sol Ring", "Lightning Bolt", "Counterspell"])
|
||||||
|
# Returns: {"Sol Ring": ["artifacts"], "Lightning Bolt": ["burn"], ...}
|
||||||
|
|
||||||
|
# Load tags for a single card
|
||||||
|
tags = load_tags_for_card("Sol Ring")
|
||||||
|
# Returns: ["artifacts", "ramp"]
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from code.logging_util import get_logger
|
||||||
|
from code.services.all_cards_loader import AllCardsLoader
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
# Global loader instance for caching
|
||||||
|
_loader_instance: Optional[AllCardsLoader] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_loader() -> AllCardsLoader:
|
||||||
|
"""Get or create the global AllCardsLoader instance."""
|
||||||
|
global _loader_instance
|
||||||
|
if _loader_instance is None:
|
||||||
|
_loader_instance = AllCardsLoader()
|
||||||
|
return _loader_instance
|
||||||
|
|
||||||
|
|
||||||
|
def clear_cache() -> None:
|
||||||
|
"""Clear the cached all_cards data (useful after updates)."""
|
||||||
|
global _loader_instance
|
||||||
|
_loader_instance = None
|
||||||
|
|
||||||
|
|
||||||
|
def load_tags_for_cards(card_names: List[str]) -> Dict[str, List[str]]:
|
||||||
|
"""Load theme tags for multiple cards in one batch operation.
|
||||||
|
|
||||||
|
This is much faster than loading tags for each card individually,
|
||||||
|
especially when dealing with 50+ cards (typical deck size).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
card_names: List of card names to load tags for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping card name to list of theme tags.
|
||||||
|
Cards not found or without tags will have empty list.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> tags = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
|
||||||
|
>>> tags["Sol Ring"]
|
||||||
|
["artifacts", "ramp"]
|
||||||
|
"""
|
||||||
|
if not card_names:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
loader = _get_loader()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Batch lookup - single query for all cards
|
||||||
|
df = loader.get_by_names(card_names)
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
logger.debug(f"No cards found for {len(card_names)} names")
|
||||||
|
return {name: [] for name in card_names}
|
||||||
|
|
||||||
|
# Extract tags from DataFrame
|
||||||
|
result: Dict[str, List[str]] = {}
|
||||||
|
|
||||||
|
if "themeTags" not in df.columns:
|
||||||
|
logger.warning("themeTags column not found in all_cards")
|
||||||
|
return {name: [] for name in card_names}
|
||||||
|
|
||||||
|
# Build lookup dictionary
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
name = row.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tags = row.get("themeTags", [])
|
||||||
|
|
||||||
|
# Handle different themeTags formats
|
||||||
|
if isinstance(tags, list):
|
||||||
|
# Already a list - use directly
|
||||||
|
result[name] = [str(t).strip() for t in tags if t]
|
||||||
|
elif isinstance(tags, str):
|
||||||
|
# String format - could be comma-separated or list repr
|
||||||
|
if not tags or tags == "[]":
|
||||||
|
result[name] = []
|
||||||
|
elif tags.startswith("["):
|
||||||
|
# List representation like "['tag1', 'tag2']"
|
||||||
|
import ast
|
||||||
|
try:
|
||||||
|
parsed = ast.literal_eval(tags)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
result[name] = [str(t).strip() for t in parsed if t]
|
||||||
|
else:
|
||||||
|
result[name] = []
|
||||||
|
except (ValueError, SyntaxError):
|
||||||
|
# Fallback to comma split
|
||||||
|
result[name] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
else:
|
||||||
|
# Comma-separated tags
|
||||||
|
result[name] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
else:
|
||||||
|
result[name] = []
|
||||||
|
|
||||||
|
# Fill in missing cards with empty lists
|
||||||
|
for name in card_names:
|
||||||
|
if name not in result:
|
||||||
|
result[name] = []
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning("all_cards file not found, returning empty tags")
|
||||||
|
return {name: [] for name in card_names}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error loading tags for cards: {e}")
|
||||||
|
return {name: [] for name in card_names}
|
||||||
|
|
||||||
|
|
||||||
|
def load_tags_for_card(card_name: str) -> List[str]:
|
||||||
|
"""Load theme tags for a single card.
|
||||||
|
|
||||||
|
For loading tags for multiple cards, use load_tags_for_cards() instead
|
||||||
|
for better performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
card_name: Name of the card
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of theme tags for the card (empty if not found)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> tags = load_tags_for_card("Sol Ring")
|
||||||
|
>>> "artifacts" in tags
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
result = load_tags_for_cards([card_name])
|
||||||
|
return result.get(card_name, [])
|
||||||
|
|
||||||
|
|
||||||
|
def get_cards_with_tag(tag: str, limit: Optional[int] = None) -> List[str]:
|
||||||
|
"""Get all card names that have a specific tag.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tag: Theme tag to search for
|
||||||
|
limit: Maximum number of cards to return (None = no limit)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of card names with the tag
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> cards = get_cards_with_tag("ramp", limit=10)
|
||||||
|
>>> len(cards) <= 10
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
loader = _get_loader()
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = loader.filter_by_themes([tag], mode="any")
|
||||||
|
|
||||||
|
if "name" not in df.columns:
|
||||||
|
return []
|
||||||
|
|
||||||
|
cards = df["name"].tolist()
|
||||||
|
|
||||||
|
if limit is not None and len(cards) > limit:
|
||||||
|
return cards[:limit]
|
||||||
|
|
||||||
|
return cards
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting cards with tag '{tag}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_cards_with_all_tags(tags: List[str], limit: Optional[int] = None) -> List[str]:
|
||||||
|
"""Get all card names that have ALL of the specified tags.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tags: List of theme tags (card must have all of them)
|
||||||
|
limit: Maximum number of cards to return (None = no limit)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of card names with all specified tags
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> cards = get_cards_with_all_tags(["ramp", "artifacts"])
|
||||||
|
>>> # Returns cards that have both ramp AND artifacts tags
|
||||||
|
"""
|
||||||
|
loader = _get_loader()
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = loader.filter_by_themes(tags, mode="all")
|
||||||
|
|
||||||
|
if "name" not in df.columns:
|
||||||
|
return []
|
||||||
|
|
||||||
|
cards = df["name"].tolist()
|
||||||
|
|
||||||
|
if limit is not None and len(cards) > limit:
|
||||||
|
return cards[:limit]
|
||||||
|
|
||||||
|
return cards
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting cards with all tags {tags}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def is_use_all_cards_enabled() -> bool:
|
||||||
|
"""Check if all_cards-based tag loading is enabled.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if USE_ALL_CARDS_FOR_TAGS is enabled (default: True)
|
||||||
|
"""
|
||||||
|
# Check environment variable
|
||||||
|
env_value = os.environ.get("USE_ALL_CARDS_FOR_TAGS", "true").lower()
|
||||||
|
return env_value in ("1", "true", "yes", "on")
|
||||||
602
code/tagging/theme_enrichment.py
Normal file
602
code/tagging/theme_enrichment.py
Normal file
|
|
@ -0,0 +1,602 @@
|
||||||
|
"""Consolidated theme metadata enrichment pipeline.
|
||||||
|
|
||||||
|
Replaces 7 separate subprocess scripts with single efficient in-memory pipeline:
|
||||||
|
1. autofill_min_examples - Add placeholder examples
|
||||||
|
2. pad_min_examples - Pad to minimum threshold
|
||||||
|
3. cleanup_placeholder_examples - Remove placeholders when real examples added
|
||||||
|
4. purge_anchor_placeholders - Purge legacy anchor placeholders
|
||||||
|
5. augment_theme_yaml_from_catalog - Add descriptions/popularity from catalog
|
||||||
|
6. generate_theme_editorial_suggestions - Generate editorial suggestions
|
||||||
|
7. lint_theme_editorial - Validate metadata
|
||||||
|
|
||||||
|
Performance improvement: 5-10x faster by loading all YAMLs once, processing in memory,
|
||||||
|
writing once at the end.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Set
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml # type: ignore
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
yaml = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ThemeData:
|
||||||
|
"""In-memory representation of a theme YAML file."""
|
||||||
|
path: Path
|
||||||
|
data: Dict[str, Any]
|
||||||
|
modified: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EnrichmentStats:
|
||||||
|
"""Statistics for enrichment pipeline run."""
|
||||||
|
autofilled: int = 0
|
||||||
|
padded: int = 0
|
||||||
|
cleaned: int = 0
|
||||||
|
purged: int = 0
|
||||||
|
augmented: int = 0
|
||||||
|
suggestions_added: int = 0
|
||||||
|
lint_errors: int = 0
|
||||||
|
lint_warnings: int = 0
|
||||||
|
total_themes: int = 0
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"Enrichment complete: {self.total_themes} themes processed | "
|
||||||
|
f"autofilled:{self.autofilled} padded:{self.padded} cleaned:{self.cleaned} "
|
||||||
|
f"purged:{self.purged} augmented:{self.augmented} suggestions:{self.suggestions_added} | "
|
||||||
|
f"lint: {self.lint_errors} errors, {self.lint_warnings} warnings"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ThemeEnrichmentPipeline:
|
||||||
|
"""Consolidated theme metadata enrichment pipeline."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: Optional[Path] = None,
|
||||||
|
min_examples: int = 5,
|
||||||
|
progress_callback: Optional[Callable[[str], None]] = None,
|
||||||
|
):
|
||||||
|
"""Initialize the enrichment pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
root: Project root directory (defaults to auto-detect)
|
||||||
|
min_examples: Minimum number of example commanders required
|
||||||
|
progress_callback: Optional callback for progress updates (for web UI)
|
||||||
|
"""
|
||||||
|
if root is None:
|
||||||
|
# Auto-detect root (3 levels up from this file)
|
||||||
|
root = Path(__file__).resolve().parents[2]
|
||||||
|
|
||||||
|
self.root = root
|
||||||
|
self.catalog_dir = root / 'config' / 'themes' / 'catalog'
|
||||||
|
self.theme_json = root / 'config' / 'themes' / 'theme_list.json'
|
||||||
|
self.csv_dir = root / 'csv_files'
|
||||||
|
self.min_examples = min_examples
|
||||||
|
self.progress_callback = progress_callback
|
||||||
|
|
||||||
|
self.themes: Dict[Path, ThemeData] = {}
|
||||||
|
self.stats = EnrichmentStats()
|
||||||
|
|
||||||
|
# Cached data
|
||||||
|
self._catalog_map: Optional[Dict[str, Dict[str, Any]]] = None
|
||||||
|
self._card_suggestions: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
def _emit(self, message: str) -> None:
|
||||||
|
"""Emit progress message via callback or print."""
|
||||||
|
if self.progress_callback:
|
||||||
|
try:
|
||||||
|
self.progress_callback(message)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
print(message, flush=True)
|
||||||
|
|
||||||
|
def load_all_themes(self) -> None:
|
||||||
|
"""Load all theme YAML files into memory (Step 0)."""
|
||||||
|
if not self.catalog_dir.exists():
|
||||||
|
self._emit("Warning: Catalog directory does not exist")
|
||||||
|
return
|
||||||
|
|
||||||
|
paths = sorted(self.catalog_dir.glob('*.yml'))
|
||||||
|
self.stats.total_themes = len(paths)
|
||||||
|
|
||||||
|
for path in paths:
|
||||||
|
try:
|
||||||
|
if yaml is None:
|
||||||
|
raise RuntimeError("PyYAML not installed")
|
||||||
|
data = yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||||
|
if isinstance(data, dict):
|
||||||
|
self.themes[path] = ThemeData(path=path, data=data)
|
||||||
|
except Exception as e:
|
||||||
|
self._emit(f"Warning: Failed to load {path.name}: {e}")
|
||||||
|
|
||||||
|
self._emit(f"Loaded {len(self.themes)} theme files")
|
||||||
|
|
||||||
|
def _is_deprecated_alias(self, theme_data: Dict[str, Any]) -> bool:
|
||||||
|
"""Check if theme is a deprecated alias placeholder."""
|
||||||
|
notes = theme_data.get('notes')
|
||||||
|
return isinstance(notes, str) and 'Deprecated alias file' in notes
|
||||||
|
|
||||||
|
def _is_placeholder(self, entry: str) -> bool:
|
||||||
|
"""Check if an example entry is a placeholder.
|
||||||
|
|
||||||
|
Matches:
|
||||||
|
- "Theme Anchor"
|
||||||
|
- "Theme Anchor B"
|
||||||
|
- "Theme Anchor C"
|
||||||
|
etc.
|
||||||
|
"""
|
||||||
|
pattern = re.compile(r" Anchor( [A-Z])?$")
|
||||||
|
return bool(pattern.search(entry))
|
||||||
|
|
||||||
|
# Step 1: Autofill minimal placeholders
|
||||||
|
def autofill_placeholders(self) -> None:
|
||||||
|
"""Add placeholder examples for themes with zero examples."""
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
if self._is_deprecated_alias(data):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not data.get('display_name'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip if theme already has real (non-placeholder) examples in YAML
|
||||||
|
examples = data.get('example_commanders') or []
|
||||||
|
if isinstance(examples, list) and examples:
|
||||||
|
# Check if any examples are real (not " Anchor" placeholders)
|
||||||
|
has_real_examples = any(
|
||||||
|
isinstance(ex, str) and ex and not ex.endswith(' Anchor')
|
||||||
|
for ex in examples
|
||||||
|
)
|
||||||
|
if has_real_examples:
|
||||||
|
continue # Already has real examples, skip placeholder generation
|
||||||
|
# If only placeholders, continue to avoid overwriting
|
||||||
|
|
||||||
|
display = data['display_name']
|
||||||
|
synergies = data.get('synergies') or []
|
||||||
|
if not isinstance(synergies, list):
|
||||||
|
synergies = []
|
||||||
|
|
||||||
|
# Generate placeholders from display name + synergies
|
||||||
|
placeholders = [f"{display} Anchor"]
|
||||||
|
for s in synergies[:2]: # First 2 synergies
|
||||||
|
if isinstance(s, str) and s and s != display:
|
||||||
|
placeholders.append(f"{s} Anchor")
|
||||||
|
|
||||||
|
data['example_commanders'] = placeholders
|
||||||
|
if not data.get('editorial_quality'):
|
||||||
|
data['editorial_quality'] = 'draft'
|
||||||
|
|
||||||
|
theme.modified = True
|
||||||
|
self.stats.autofilled += 1
|
||||||
|
|
||||||
|
# Step 2: Pad to minimum examples
|
||||||
|
def pad_examples(self) -> None:
|
||||||
|
"""Pad example lists to minimum threshold with placeholders."""
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
if self._is_deprecated_alias(data):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not data.get('display_name'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
examples = data.get('example_commanders') or []
|
||||||
|
if not isinstance(examples, list):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(examples) >= self.min_examples:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Only pad pure placeholder sets (heuristic: don't mix real + placeholders)
|
||||||
|
if any(not self._is_placeholder(e) for e in examples):
|
||||||
|
continue
|
||||||
|
|
||||||
|
display = data['display_name']
|
||||||
|
synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
|
||||||
|
need = self.min_examples - len(examples)
|
||||||
|
|
||||||
|
# Build additional placeholders
|
||||||
|
new_placeholders = []
|
||||||
|
used = set(examples)
|
||||||
|
|
||||||
|
# 1. Additional synergies beyond first 2
|
||||||
|
for syn in synergies[2:]:
|
||||||
|
cand = f"{syn} Anchor"
|
||||||
|
if cand not in used and syn != display:
|
||||||
|
new_placeholders.append(cand)
|
||||||
|
if len(new_placeholders) >= need:
|
||||||
|
break
|
||||||
|
|
||||||
|
# 2. Generic letter suffixes (B, C, D, ...)
|
||||||
|
if len(new_placeholders) < need:
|
||||||
|
for suffix in string.ascii_uppercase[1:]: # Start from 'B'
|
||||||
|
cand = f"{display} Anchor {suffix}"
|
||||||
|
if cand not in used:
|
||||||
|
new_placeholders.append(cand)
|
||||||
|
if len(new_placeholders) >= need:
|
||||||
|
break
|
||||||
|
|
||||||
|
if new_placeholders:
|
||||||
|
data['example_commanders'] = examples + new_placeholders
|
||||||
|
if not data.get('editorial_quality'):
|
||||||
|
data['editorial_quality'] = 'draft'
|
||||||
|
theme.modified = True
|
||||||
|
self.stats.padded += 1
|
||||||
|
|
||||||
|
# Step 3: Cleanup placeholders when real examples exist
|
||||||
|
def cleanup_placeholders(self) -> None:
|
||||||
|
"""Remove placeholders when real examples have been added."""
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
if self._is_deprecated_alias(data):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not data.get('display_name'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
examples = data.get('example_commanders')
|
||||||
|
if not isinstance(examples, list) or not examples:
|
||||||
|
continue
|
||||||
|
|
||||||
|
placeholders = [e for e in examples if isinstance(e, str) and self._is_placeholder(e)]
|
||||||
|
real = [e for e in examples if isinstance(e, str) and not self._is_placeholder(e)]
|
||||||
|
|
||||||
|
# Only cleanup if we have both placeholders AND real examples
|
||||||
|
if placeholders and real:
|
||||||
|
new_list = real if real else placeholders[:1] # Keep at least one if all placeholders
|
||||||
|
if new_list != examples:
|
||||||
|
data['example_commanders'] = new_list
|
||||||
|
theme.modified = True
|
||||||
|
self.stats.cleaned += 1
|
||||||
|
|
||||||
|
# Step 4: Purge legacy anchor placeholders
|
||||||
|
def purge_anchors(self) -> None:
|
||||||
|
"""Remove all legacy anchor placeholders."""
|
||||||
|
pattern = re.compile(r" Anchor( [A-Z])?$")
|
||||||
|
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
examples = data.get('example_commanders')
|
||||||
|
if not isinstance(examples, list) or not examples:
|
||||||
|
continue
|
||||||
|
|
||||||
|
placeholders = [e for e in examples if isinstance(e, str) and pattern.search(e)]
|
||||||
|
if not placeholders:
|
||||||
|
continue
|
||||||
|
|
||||||
|
real = [e for e in examples if isinstance(e, str) and not pattern.search(e)]
|
||||||
|
new_list = real # Remove ALL placeholders (even if list becomes empty)
|
||||||
|
|
||||||
|
if new_list != examples:
|
||||||
|
data['example_commanders'] = new_list
|
||||||
|
theme.modified = True
|
||||||
|
self.stats.purged += 1
|
||||||
|
|
||||||
|
# Step 5: Augment from catalog
|
||||||
|
def _load_catalog_map(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""Load theme_list.json catalog into memory."""
|
||||||
|
if self._catalog_map is not None:
|
||||||
|
return self._catalog_map
|
||||||
|
|
||||||
|
if not self.theme_json.exists():
|
||||||
|
self._emit("Warning: theme_list.json not found")
|
||||||
|
self._catalog_map = {}
|
||||||
|
return self._catalog_map
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(self.theme_json.read_text(encoding='utf-8') or '{}')
|
||||||
|
themes = data.get('themes') or []
|
||||||
|
self._catalog_map = {}
|
||||||
|
for t in themes:
|
||||||
|
if isinstance(t, dict) and t.get('theme'):
|
||||||
|
self._catalog_map[str(t['theme'])] = t
|
||||||
|
except Exception as e:
|
||||||
|
self._emit(f"Warning: Failed to parse theme_list.json: {e}")
|
||||||
|
self._catalog_map = {}
|
||||||
|
|
||||||
|
return self._catalog_map
|
||||||
|
|
||||||
|
def augment_from_catalog(self) -> None:
|
||||||
|
"""Add description, popularity, etc. from theme_list.json."""
|
||||||
|
catalog_map = self._load_catalog_map()
|
||||||
|
if not catalog_map:
|
||||||
|
return
|
||||||
|
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
if self._is_deprecated_alias(data):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = str(data.get('display_name') or '').strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cat_entry = catalog_map.get(name)
|
||||||
|
if not cat_entry:
|
||||||
|
continue
|
||||||
|
|
||||||
|
modified = False
|
||||||
|
|
||||||
|
# Add description if missing
|
||||||
|
if 'description' not in data and 'description' in cat_entry and cat_entry['description']:
|
||||||
|
data['description'] = cat_entry['description']
|
||||||
|
modified = True
|
||||||
|
|
||||||
|
# Add popularity bucket if missing
|
||||||
|
if 'popularity_bucket' not in data and cat_entry.get('popularity_bucket'):
|
||||||
|
data['popularity_bucket'] = cat_entry['popularity_bucket']
|
||||||
|
modified = True
|
||||||
|
|
||||||
|
# Add popularity hint if missing
|
||||||
|
if 'popularity_hint' not in data and cat_entry.get('popularity_hint'):
|
||||||
|
data['popularity_hint'] = cat_entry['popularity_hint']
|
||||||
|
modified = True
|
||||||
|
|
||||||
|
# Backfill deck archetype if missing (defensive)
|
||||||
|
if 'deck_archetype' not in data and cat_entry.get('deck_archetype'):
|
||||||
|
data['deck_archetype'] = cat_entry['deck_archetype']
|
||||||
|
modified = True
|
||||||
|
|
||||||
|
if modified:
|
||||||
|
theme.modified = True
|
||||||
|
self.stats.augmented += 1
|
||||||
|
|
||||||
|
# Step 6: Generate editorial suggestions (simplified - full implementation would scan CSVs)
|
||||||
|
def generate_suggestions(self) -> None:
|
||||||
|
"""Generate editorial suggestions for missing example_cards/commanders.
|
||||||
|
|
||||||
|
This runs the generate_theme_editorial_suggestions.py script to populate
|
||||||
|
example_cards and example_commanders from CSV data (EDHREC ranks + themeTags).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# Check if we should run the editorial suggestions generator
|
||||||
|
skip_suggestions = os.environ.get('SKIP_EDITORIAL_SUGGESTIONS', '').lower() in ('1', 'true', 'yes')
|
||||||
|
if skip_suggestions:
|
||||||
|
self._emit("Skipping editorial suggestions generation (SKIP_EDITORIAL_SUGGESTIONS=1)")
|
||||||
|
return
|
||||||
|
|
||||||
|
script_path = self.root / 'code' / 'scripts' / 'generate_theme_editorial_suggestions.py'
|
||||||
|
if not script_path.exists():
|
||||||
|
self._emit("Editorial suggestions script not found; skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._emit("Generating example_cards and example_commanders from CSV data...")
|
||||||
|
# Run with --apply to write missing fields, limit to reasonable batch
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, str(script_path), '--apply', '--limit-yaml', '1000', '--top', '8'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=300, # 5 minute timeout
|
||||||
|
cwd=str(self.root)
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
# Reload themes to pick up the generated examples
|
||||||
|
self.load_all_themes()
|
||||||
|
self._emit("Editorial suggestions generated successfully")
|
||||||
|
else:
|
||||||
|
self._emit(f"Editorial suggestions script failed (exit {result.returncode}): {result.stderr[:200]}")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
self._emit("Editorial suggestions generation timed out (skipping)")
|
||||||
|
except Exception as e:
|
||||||
|
self._emit(f"Failed to generate editorial suggestions: {e}")
|
||||||
|
|
||||||
|
# Step 7: Lint/validate
|
||||||
|
ALLOWED_ARCHETYPES: Set[str] = {
|
||||||
|
'Lands', 'Graveyard', 'Planeswalkers', 'Tokens', 'Counters', 'Spells',
|
||||||
|
'Artifacts', 'Enchantments', 'Politics', 'Combo', 'Aggro', 'Control',
|
||||||
|
'Midrange', 'Stax', 'Ramp', 'Toolbox'
|
||||||
|
}
|
||||||
|
|
||||||
|
CORNERSTONE: Set[str] = {
|
||||||
|
'Landfall', 'Reanimate', 'Superfriends', 'Tokens Matter', '+1/+1 Counters'
|
||||||
|
}
|
||||||
|
|
||||||
|
def validate(self, enforce_min: bool = False, strict: bool = False) -> None:
|
||||||
|
"""Validate theme metadata (lint)."""
|
||||||
|
errors: List[str] = []
|
||||||
|
warnings: List[str] = []
|
||||||
|
seen_display: Set[str] = set()
|
||||||
|
|
||||||
|
for theme in self.themes.values():
|
||||||
|
data = theme.data
|
||||||
|
|
||||||
|
if self._is_deprecated_alias(data):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = str(data.get('display_name') or '').strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if name in seen_display:
|
||||||
|
continue # Skip duplicates
|
||||||
|
seen_display.add(name)
|
||||||
|
|
||||||
|
ex_cmd = data.get('example_commanders') or []
|
||||||
|
ex_cards = data.get('example_cards') or []
|
||||||
|
|
||||||
|
if not isinstance(ex_cmd, list):
|
||||||
|
errors.append(f"{name}: example_commanders not a list")
|
||||||
|
ex_cmd = []
|
||||||
|
|
||||||
|
if not isinstance(ex_cards, list):
|
||||||
|
errors.append(f"{name}: example_cards not a list")
|
||||||
|
ex_cards = []
|
||||||
|
|
||||||
|
# Length checks
|
||||||
|
if len(ex_cmd) > 12:
|
||||||
|
warnings.append(f"{name}: example_commanders has {len(ex_cmd)} entries (>12)")
|
||||||
|
|
||||||
|
if len(ex_cards) > 20:
|
||||||
|
warnings.append(f"{name}: example_cards has {len(ex_cards)} entries (>20)")
|
||||||
|
|
||||||
|
# Minimum examples check
|
||||||
|
if ex_cmd and len(ex_cmd) < self.min_examples:
|
||||||
|
msg = f"{name}: only {len(ex_cmd)} example_commanders (<{self.min_examples} minimum)"
|
||||||
|
if enforce_min:
|
||||||
|
errors.append(msg)
|
||||||
|
else:
|
||||||
|
warnings.append(msg)
|
||||||
|
|
||||||
|
# Cornerstone themes should have examples (if strict)
|
||||||
|
if strict and name in self.CORNERSTONE:
|
||||||
|
if not ex_cmd:
|
||||||
|
errors.append(f"{name}: cornerstone theme missing example_commanders")
|
||||||
|
if not ex_cards:
|
||||||
|
errors.append(f"{name}: cornerstone theme missing example_cards")
|
||||||
|
|
||||||
|
# Deck archetype validation
|
||||||
|
archetype = data.get('deck_archetype')
|
||||||
|
if archetype and archetype not in self.ALLOWED_ARCHETYPES:
|
||||||
|
warnings.append(f"{name}: unknown deck_archetype '{archetype}'")
|
||||||
|
|
||||||
|
self.stats.lint_errors = len(errors)
|
||||||
|
self.stats.lint_warnings = len(warnings)
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
for err in errors:
|
||||||
|
self._emit(f"ERROR: {err}")
|
||||||
|
|
||||||
|
if warnings:
|
||||||
|
for warn in warnings:
|
||||||
|
self._emit(f"WARNING: {warn}")
|
||||||
|
|
||||||
|
def write_all_themes(self) -> None:
|
||||||
|
"""Write all modified themes back to disk (final step)."""
|
||||||
|
if yaml is None:
|
||||||
|
raise RuntimeError("PyYAML not installed; cannot write themes")
|
||||||
|
|
||||||
|
written = 0
|
||||||
|
for theme in self.themes.values():
|
||||||
|
if theme.modified:
|
||||||
|
try:
|
||||||
|
theme.path.write_text(
|
||||||
|
yaml.safe_dump(theme.data, sort_keys=False, allow_unicode=True),
|
||||||
|
encoding='utf-8'
|
||||||
|
)
|
||||||
|
written += 1
|
||||||
|
except Exception as e:
|
||||||
|
self._emit(f"Error writing {theme.path.name}: {e}")
|
||||||
|
|
||||||
|
self._emit(f"Wrote {written} modified theme files")
|
||||||
|
|
||||||
|
def run_all(
|
||||||
|
self,
|
||||||
|
write: bool = True,
|
||||||
|
enforce_min: bool = False,
|
||||||
|
strict_lint: bool = False,
|
||||||
|
run_purge: bool = False,
|
||||||
|
) -> EnrichmentStats:
|
||||||
|
"""Run the full enrichment pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
write: Whether to write changes to disk (False = dry run)
|
||||||
|
enforce_min: Whether to treat min_examples violations as errors
|
||||||
|
strict_lint: Whether to enforce strict validation rules
|
||||||
|
run_purge: Whether to run purge step (removes ALL anchor placeholders)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
EnrichmentStats with summary of operations
|
||||||
|
"""
|
||||||
|
self._emit("Starting theme enrichment pipeline...")
|
||||||
|
|
||||||
|
# Step 0: Load all themes
|
||||||
|
self.load_all_themes()
|
||||||
|
|
||||||
|
# Step 1: Autofill placeholders
|
||||||
|
self._emit("Step 1/7: Autofilling placeholders...")
|
||||||
|
self.autofill_placeholders()
|
||||||
|
|
||||||
|
# Step 2: Pad to minimum
|
||||||
|
self._emit("Step 2/7: Padding to minimum examples...")
|
||||||
|
self.pad_examples()
|
||||||
|
|
||||||
|
# Step 3: Cleanup mixed placeholder/real lists
|
||||||
|
self._emit("Step 3/7: Cleaning up placeholders...")
|
||||||
|
self.cleanup_placeholders()
|
||||||
|
|
||||||
|
# Step 4: Purge all anchor placeholders (optional - disabled by default)
|
||||||
|
# Note: Purge removes ALL anchors, even from pure placeholder lists.
|
||||||
|
# Only enable for one-time migration away from placeholder system.
|
||||||
|
if run_purge:
|
||||||
|
self._emit("Step 4/7: Purging legacy anchors...")
|
||||||
|
self.purge_anchors()
|
||||||
|
else:
|
||||||
|
self._emit("Step 4/7: Skipping purge (preserving placeholders)...")
|
||||||
|
|
||||||
|
# Step 5: Augment from catalog
|
||||||
|
self._emit("Step 5/7: Augmenting from catalog...")
|
||||||
|
self.augment_from_catalog()
|
||||||
|
|
||||||
|
# Step 6: Generate suggestions (skipped for performance)
|
||||||
|
self._emit("Step 6/7: Generating suggestions...")
|
||||||
|
self.generate_suggestions()
|
||||||
|
|
||||||
|
# Step 7: Validate
|
||||||
|
self._emit("Step 7/7: Validating metadata...")
|
||||||
|
self.validate(enforce_min=enforce_min, strict=strict_lint)
|
||||||
|
|
||||||
|
# Write changes
|
||||||
|
if write:
|
||||||
|
self._emit("Writing changes to disk...")
|
||||||
|
self.write_all_themes()
|
||||||
|
else:
|
||||||
|
self._emit("Dry run: no files written")
|
||||||
|
|
||||||
|
self._emit(str(self.stats))
|
||||||
|
return self.stats
|
||||||
|
|
||||||
|
|
||||||
|
def run_enrichment_pipeline(
|
||||||
|
root: Optional[Path] = None,
|
||||||
|
min_examples: int = 5,
|
||||||
|
write: bool = True,
|
||||||
|
enforce_min: bool = False,
|
||||||
|
strict: bool = False,
|
||||||
|
run_purge: bool = False,
|
||||||
|
progress_callback: Optional[Callable[[str], None]] = None,
|
||||||
|
) -> EnrichmentStats:
|
||||||
|
"""Convenience function to run the enrichment pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
root: Project root directory
|
||||||
|
min_examples: Minimum number of example commanders
|
||||||
|
write: Whether to write changes (False = dry run)
|
||||||
|
enforce_min: Treat min examples violations as errors
|
||||||
|
strict: Enforce strict validation rules
|
||||||
|
run_purge: Whether to run purge step (removes ALL placeholders)
|
||||||
|
progress_callback: Optional progress callback
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
EnrichmentStats summary
|
||||||
|
"""
|
||||||
|
pipeline = ThemeEnrichmentPipeline(
|
||||||
|
root=root,
|
||||||
|
min_examples=min_examples,
|
||||||
|
progress_callback=progress_callback,
|
||||||
|
)
|
||||||
|
return pipeline.run_all(
|
||||||
|
write=write,
|
||||||
|
enforce_min=enforce_min,
|
||||||
|
strict_lint=strict,
|
||||||
|
run_purge=run_purge
|
||||||
|
)
|
||||||
429
code/tests/test_tag_index.py
Normal file
429
code/tests/test_tag_index.py
Normal file
|
|
@ -0,0 +1,429 @@
|
||||||
|
"""Tests for tag index functionality."""
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
from code.tagging.tag_index import (
|
||||||
|
TagIndex,
|
||||||
|
IndexStats,
|
||||||
|
get_tag_index,
|
||||||
|
clear_global_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTagIndexBuild:
|
||||||
|
"""Test index building operations."""
|
||||||
|
|
||||||
|
def test_build_index(self):
|
||||||
|
"""Test that index builds successfully."""
|
||||||
|
index = TagIndex()
|
||||||
|
stats = index.build()
|
||||||
|
|
||||||
|
assert isinstance(stats, IndexStats)
|
||||||
|
assert stats.total_cards > 0
|
||||||
|
assert stats.total_tags > 0
|
||||||
|
assert stats.total_mappings > 0
|
||||||
|
assert stats.build_time_seconds >= 0
|
||||||
|
|
||||||
|
def test_build_index_performance(self):
|
||||||
|
"""Test that index builds in reasonable time."""
|
||||||
|
index = TagIndex()
|
||||||
|
|
||||||
|
start = time.perf_counter()
|
||||||
|
stats = index.build()
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
|
||||||
|
# Should build in <5s for typical dataset
|
||||||
|
assert elapsed < 5.0
|
||||||
|
assert stats.build_time_seconds < 5.0
|
||||||
|
|
||||||
|
def test_force_rebuild(self):
|
||||||
|
"""Test that force_rebuild always rebuilds."""
|
||||||
|
index = TagIndex()
|
||||||
|
|
||||||
|
# Build once
|
||||||
|
stats1 = index.build()
|
||||||
|
time1 = stats1.indexed_at
|
||||||
|
|
||||||
|
# Wait a bit
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
# Force rebuild
|
||||||
|
stats2 = index.build(force_rebuild=True)
|
||||||
|
time2 = stats2.indexed_at
|
||||||
|
|
||||||
|
# Should have different timestamps
|
||||||
|
assert time2 > time1
|
||||||
|
|
||||||
|
|
||||||
|
class TestSingleTagQueries:
|
||||||
|
"""Test single tag lookup operations."""
|
||||||
|
|
||||||
|
def test_get_cards_with_tag(self):
|
||||||
|
"""Test getting cards with a specific tag."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
# Get a tag that exists
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if all_tags:
|
||||||
|
tag = all_tags[0]
|
||||||
|
cards = index.get_cards_with_tag(tag)
|
||||||
|
|
||||||
|
assert isinstance(cards, set)
|
||||||
|
assert len(cards) > 0
|
||||||
|
|
||||||
|
def test_get_cards_with_nonexistent_tag(self):
|
||||||
|
"""Test querying for tag that doesn't exist."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
cards = index.get_cards_with_tag("ThisTagDoesNotExist12345")
|
||||||
|
|
||||||
|
assert cards == set()
|
||||||
|
|
||||||
|
def test_get_tags_for_card(self):
|
||||||
|
"""Test getting tags for a specific card."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
# Get a card that exists
|
||||||
|
cards = index.get_cards_with_tag(index.get_all_tags()[0]) if index.get_all_tags() else set()
|
||||||
|
if cards:
|
||||||
|
card_name = list(cards)[0]
|
||||||
|
tags = index.get_tags_for_card(card_name)
|
||||||
|
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
assert len(tags) > 0
|
||||||
|
|
||||||
|
def test_get_tags_for_nonexistent_card(self):
|
||||||
|
"""Test getting tags for card that doesn't exist."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
tags = index.get_tags_for_card("This Card Does Not Exist 12345")
|
||||||
|
|
||||||
|
assert tags == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestMultiTagQueries:
|
||||||
|
"""Test queries with multiple tags."""
|
||||||
|
|
||||||
|
def test_get_cards_with_all_tags(self):
|
||||||
|
"""Test AND logic (cards must have all tags)."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if len(all_tags) >= 2:
|
||||||
|
# Pick two tags
|
||||||
|
tag1, tag2 = all_tags[0], all_tags[1]
|
||||||
|
|
||||||
|
cards1 = index.get_cards_with_tag(tag1)
|
||||||
|
cards2 = index.get_cards_with_tag(tag2)
|
||||||
|
cards_both = index.get_cards_with_all_tags([tag1, tag2])
|
||||||
|
|
||||||
|
# Result should be subset of both
|
||||||
|
assert cards_both.issubset(cards1)
|
||||||
|
assert cards_both.issubset(cards2)
|
||||||
|
|
||||||
|
# Result should be intersection
|
||||||
|
assert cards_both == (cards1 & cards2)
|
||||||
|
|
||||||
|
def test_get_cards_with_any_tags(self):
|
||||||
|
"""Test OR logic (cards need at least one tag)."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if len(all_tags) >= 2:
|
||||||
|
# Pick two tags
|
||||||
|
tag1, tag2 = all_tags[0], all_tags[1]
|
||||||
|
|
||||||
|
cards1 = index.get_cards_with_tag(tag1)
|
||||||
|
cards2 = index.get_cards_with_tag(tag2)
|
||||||
|
cards_any = index.get_cards_with_any_tags([tag1, tag2])
|
||||||
|
|
||||||
|
# Result should be superset of both
|
||||||
|
assert cards1.issubset(cards_any)
|
||||||
|
assert cards2.issubset(cards_any)
|
||||||
|
|
||||||
|
# Result should be union
|
||||||
|
assert cards_any == (cards1 | cards2)
|
||||||
|
|
||||||
|
def test_get_cards_with_empty_tag_list(self):
|
||||||
|
"""Test querying with empty tag list."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
cards_all = index.get_cards_with_all_tags([])
|
||||||
|
cards_any = index.get_cards_with_any_tags([])
|
||||||
|
|
||||||
|
assert cards_all == set()
|
||||||
|
assert cards_any == set()
|
||||||
|
|
||||||
|
def test_get_cards_with_nonexistent_tags(self):
|
||||||
|
"""Test querying with tags that don't exist."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
fake_tags = ["FakeTag1", "FakeTag2"]
|
||||||
|
|
||||||
|
cards_all = index.get_cards_with_all_tags(fake_tags)
|
||||||
|
cards_any = index.get_cards_with_any_tags(fake_tags)
|
||||||
|
|
||||||
|
assert cards_all == set()
|
||||||
|
assert cards_any == set()
|
||||||
|
|
||||||
|
|
||||||
|
class TestIndexStats:
|
||||||
|
"""Test index statistics and metadata."""
|
||||||
|
|
||||||
|
def test_get_stats(self):
|
||||||
|
"""Test getting index statistics."""
|
||||||
|
index = TagIndex()
|
||||||
|
|
||||||
|
# Before building
|
||||||
|
assert index.get_stats() is None
|
||||||
|
|
||||||
|
# After building
|
||||||
|
stats = index.build()
|
||||||
|
retrieved_stats = index.get_stats()
|
||||||
|
|
||||||
|
assert retrieved_stats is not None
|
||||||
|
assert retrieved_stats.total_cards == stats.total_cards
|
||||||
|
assert retrieved_stats.total_tags == stats.total_tags
|
||||||
|
|
||||||
|
def test_get_all_tags(self):
|
||||||
|
"""Test getting list of all tags."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
tags = index.get_all_tags()
|
||||||
|
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
assert len(tags) > 0
|
||||||
|
# Should be sorted
|
||||||
|
assert tags == sorted(tags)
|
||||||
|
|
||||||
|
def test_get_tag_stats(self):
|
||||||
|
"""Test getting stats for specific tag."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if all_tags:
|
||||||
|
tag = all_tags[0]
|
||||||
|
stats = index.get_tag_stats(tag)
|
||||||
|
|
||||||
|
assert "card_count" in stats
|
||||||
|
assert stats["card_count"] > 0
|
||||||
|
|
||||||
|
def test_get_popular_tags(self):
|
||||||
|
"""Test getting most popular tags."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
popular = index.get_popular_tags(limit=10)
|
||||||
|
|
||||||
|
assert isinstance(popular, list)
|
||||||
|
assert len(popular) <= 10
|
||||||
|
|
||||||
|
if len(popular) > 1:
|
||||||
|
# Should be sorted by count descending
|
||||||
|
counts = [count for _, count in popular]
|
||||||
|
assert counts == sorted(counts, reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCaching:
|
||||||
|
"""Test index caching and persistence."""
|
||||||
|
|
||||||
|
def test_save_and_load_cache(self, tmp_path):
|
||||||
|
"""Test that cache saves and loads correctly."""
|
||||||
|
cache_path = tmp_path / ".tag_index_test.json"
|
||||||
|
|
||||||
|
# Build and save
|
||||||
|
index1 = TagIndex(cache_path=cache_path)
|
||||||
|
stats1 = index1.build()
|
||||||
|
|
||||||
|
assert cache_path.exists()
|
||||||
|
|
||||||
|
# Load from cache
|
||||||
|
index2 = TagIndex(cache_path=cache_path)
|
||||||
|
stats2 = index2.build() # Should load from cache
|
||||||
|
|
||||||
|
# Should have same data
|
||||||
|
assert stats2.total_cards == stats1.total_cards
|
||||||
|
assert stats2.total_tags == stats1.total_tags
|
||||||
|
assert stats2.indexed_at == stats1.indexed_at
|
||||||
|
|
||||||
|
def test_cache_invalidation(self, tmp_path):
|
||||||
|
"""Test that cache is rebuilt when all_cards changes."""
|
||||||
|
cache_path = tmp_path / ".tag_index_test.json"
|
||||||
|
|
||||||
|
# Build index
|
||||||
|
index = TagIndex(cache_path=cache_path)
|
||||||
|
stats1 = index.build()
|
||||||
|
|
||||||
|
# Modify cache to simulate outdated mtime
|
||||||
|
with cache_path.open("r") as f:
|
||||||
|
cache_data = json.load(f)
|
||||||
|
|
||||||
|
cache_data["stats"]["all_cards_mtime"] = 0 # Very old
|
||||||
|
|
||||||
|
with cache_path.open("w") as f:
|
||||||
|
json.dump(cache_data, f)
|
||||||
|
|
||||||
|
# Should rebuild (not use cache)
|
||||||
|
index2 = TagIndex(cache_path=cache_path)
|
||||||
|
stats2 = index2.build()
|
||||||
|
|
||||||
|
# Should have new timestamp
|
||||||
|
assert stats2.indexed_at > stats1.indexed_at
|
||||||
|
|
||||||
|
def test_clear_cache(self, tmp_path):
|
||||||
|
"""Test cache clearing."""
|
||||||
|
cache_path = tmp_path / ".tag_index_test.json"
|
||||||
|
|
||||||
|
index = TagIndex(cache_path=cache_path)
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
assert cache_path.exists()
|
||||||
|
|
||||||
|
index.clear_cache()
|
||||||
|
|
||||||
|
assert not cache_path.exists()
|
||||||
|
|
||||||
|
|
||||||
|
class TestGlobalIndex:
|
||||||
|
"""Test global index accessor."""
|
||||||
|
|
||||||
|
def test_get_tag_index(self):
|
||||||
|
"""Test getting global index."""
|
||||||
|
clear_global_index()
|
||||||
|
|
||||||
|
index = get_tag_index()
|
||||||
|
|
||||||
|
assert isinstance(index, TagIndex)
|
||||||
|
assert index.get_stats() is not None
|
||||||
|
|
||||||
|
def test_get_tag_index_singleton(self):
|
||||||
|
"""Test that global index is a singleton."""
|
||||||
|
clear_global_index()
|
||||||
|
|
||||||
|
index1 = get_tag_index()
|
||||||
|
index2 = get_tag_index()
|
||||||
|
|
||||||
|
# Should be same instance
|
||||||
|
assert index1 is index2
|
||||||
|
|
||||||
|
def test_clear_global_index(self):
|
||||||
|
"""Test clearing global index."""
|
||||||
|
index1 = get_tag_index()
|
||||||
|
|
||||||
|
clear_global_index()
|
||||||
|
|
||||||
|
index2 = get_tag_index()
|
||||||
|
|
||||||
|
# Should be different instance
|
||||||
|
assert index1 is not index2
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Test edge cases and error handling."""
|
||||||
|
|
||||||
|
def test_cards_with_no_tags(self):
|
||||||
|
"""Test that cards without tags are handled."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
# Get stats - should handle cards with no tags gracefully
|
||||||
|
stats = index.get_stats()
|
||||||
|
assert stats is not None
|
||||||
|
|
||||||
|
def test_special_characters_in_tags(self):
|
||||||
|
"""Test tags with special characters."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
# Try querying with special chars (should not crash)
|
||||||
|
cards = index.get_cards_with_tag("Life & Death")
|
||||||
|
assert isinstance(cards, set)
|
||||||
|
|
||||||
|
def test_case_sensitive_tags(self):
|
||||||
|
"""Test that tag lookups are case-sensitive."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if all_tags:
|
||||||
|
tag = all_tags[0]
|
||||||
|
|
||||||
|
cards1 = index.get_cards_with_tag(tag)
|
||||||
|
cards2 = index.get_cards_with_tag(tag.upper())
|
||||||
|
cards3 = index.get_cards_with_tag(tag.lower())
|
||||||
|
|
||||||
|
# Case matters - may get different results
|
||||||
|
# (depends on tag naming in data)
|
||||||
|
assert isinstance(cards1, set)
|
||||||
|
assert isinstance(cards2, set)
|
||||||
|
assert isinstance(cards3, set)
|
||||||
|
|
||||||
|
def test_duplicate_tags_handled(self):
|
||||||
|
"""Test that duplicate tags in query are handled."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if all_tags:
|
||||||
|
tag = all_tags[0]
|
||||||
|
|
||||||
|
# Query with duplicate tag
|
||||||
|
cards = index.get_cards_with_all_tags([tag, tag])
|
||||||
|
cards_single = index.get_cards_with_tag(tag)
|
||||||
|
|
||||||
|
# Should give same result as single tag
|
||||||
|
assert cards == cards_single
|
||||||
|
|
||||||
|
|
||||||
|
class TestPerformance:
|
||||||
|
"""Test performance characteristics."""
|
||||||
|
|
||||||
|
def test_query_performance(self):
|
||||||
|
"""Test that queries complete quickly."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if all_tags:
|
||||||
|
tag = all_tags[0]
|
||||||
|
|
||||||
|
# Measure query time
|
||||||
|
start = time.perf_counter()
|
||||||
|
for _ in range(100):
|
||||||
|
index.get_cards_with_tag(tag)
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
|
||||||
|
avg_time_ms = (elapsed / 100) * 1000
|
||||||
|
|
||||||
|
# Should average <1ms per query
|
||||||
|
assert avg_time_ms < 1.0
|
||||||
|
|
||||||
|
def test_multi_tag_query_performance(self):
|
||||||
|
"""Test multi-tag query performance."""
|
||||||
|
index = TagIndex()
|
||||||
|
index.build()
|
||||||
|
|
||||||
|
all_tags = index.get_all_tags()
|
||||||
|
if len(all_tags) >= 3:
|
||||||
|
tags = all_tags[:3]
|
||||||
|
|
||||||
|
# Measure query time
|
||||||
|
start = time.perf_counter()
|
||||||
|
for _ in range(100):
|
||||||
|
index.get_cards_with_all_tags(tags)
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
|
||||||
|
avg_time_ms = (elapsed / 100) * 1000
|
||||||
|
|
||||||
|
# Should still be very fast
|
||||||
|
assert avg_time_ms < 5.0
|
||||||
259
code/tests/test_tag_loader.py
Normal file
259
code/tests/test_tag_loader.py
Normal file
|
|
@ -0,0 +1,259 @@
|
||||||
|
"""Tests for batch tag loading from all_cards."""
|
||||||
|
from code.tagging.tag_loader import (
|
||||||
|
load_tags_for_cards,
|
||||||
|
load_tags_for_card,
|
||||||
|
get_cards_with_tag,
|
||||||
|
get_cards_with_all_tags,
|
||||||
|
clear_cache,
|
||||||
|
is_use_all_cards_enabled,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBatchTagLoading:
|
||||||
|
"""Test batch tag loading operations."""
|
||||||
|
|
||||||
|
def test_load_tags_for_multiple_cards(self):
|
||||||
|
"""Test loading tags for multiple cards at once."""
|
||||||
|
cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
assert len(result) == 3
|
||||||
|
|
||||||
|
# All requested cards should be in result (even if no tags)
|
||||||
|
for card in cards:
|
||||||
|
assert card in result
|
||||||
|
assert isinstance(result[card], list)
|
||||||
|
|
||||||
|
def test_load_tags_for_empty_list(self):
|
||||||
|
"""Test loading tags for empty list returns empty dict."""
|
||||||
|
result = load_tags_for_cards([])
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
def test_load_tags_for_single_card(self):
|
||||||
|
"""Test single card convenience function."""
|
||||||
|
tags = load_tags_for_card("Sol Ring")
|
||||||
|
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
# Sol Ring should have some tags (artifacts, ramp, etc)
|
||||||
|
# But we don't assert specific tags since data may vary
|
||||||
|
|
||||||
|
def test_load_tags_for_nonexistent_card(self):
|
||||||
|
"""Test loading tags for card that doesn't exist."""
|
||||||
|
tags = load_tags_for_card("This Card Does Not Exist 12345")
|
||||||
|
|
||||||
|
# Should return empty list, not fail
|
||||||
|
assert tags == []
|
||||||
|
|
||||||
|
def test_load_tags_batch_includes_missing_cards(self):
|
||||||
|
"""Test batch loading includes missing cards with empty lists."""
|
||||||
|
cards = ["Sol Ring", "Fake Card Name 999", "Lightning Bolt"]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
# All cards should be present
|
||||||
|
assert len(result) == 3
|
||||||
|
assert "Fake Card Name 999" in result
|
||||||
|
assert result["Fake Card Name 999"] == []
|
||||||
|
|
||||||
|
def test_load_tags_handles_list_format(self):
|
||||||
|
"""Test that tags in list format are parsed correctly."""
|
||||||
|
# Pick a card likely to have tags
|
||||||
|
result = load_tags_for_cards(["Sol Ring"])
|
||||||
|
|
||||||
|
if "Sol Ring" in result and result["Sol Ring"]:
|
||||||
|
tags = result["Sol Ring"]
|
||||||
|
# Should be a list of strings
|
||||||
|
assert all(isinstance(tag, str) for tag in tags)
|
||||||
|
# Tags should be stripped of whitespace
|
||||||
|
assert all(tag == tag.strip() for tag in tags)
|
||||||
|
|
||||||
|
def test_load_tags_handles_string_format(self):
|
||||||
|
"""Test that tags in string format are parsed correctly."""
|
||||||
|
# The loader should handle both list and string representations
|
||||||
|
# This is tested implicitly by loading any card
|
||||||
|
cards = ["Sol Ring", "Lightning Bolt"]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
tags = result[card]
|
||||||
|
# All should be lists (even if empty)
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
# No empty string tags
|
||||||
|
assert "" not in tags
|
||||||
|
assert all(tag.strip() for tag in tags)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTagQueries:
|
||||||
|
"""Test querying cards by tags."""
|
||||||
|
|
||||||
|
def test_get_cards_with_tag(self):
|
||||||
|
"""Test getting all cards with a specific tag."""
|
||||||
|
# Pick a common tag
|
||||||
|
cards = get_cards_with_tag("ramp", limit=10)
|
||||||
|
|
||||||
|
assert isinstance(cards, list)
|
||||||
|
# Should have some cards (or none if tag doesn't exist)
|
||||||
|
# We don't assert specific count since data varies
|
||||||
|
|
||||||
|
def test_get_cards_with_tag_limit(self):
|
||||||
|
"""Test limit parameter works."""
|
||||||
|
cards = get_cards_with_tag("ramp", limit=5)
|
||||||
|
|
||||||
|
assert len(cards) <= 5
|
||||||
|
|
||||||
|
def test_get_cards_with_nonexistent_tag(self):
|
||||||
|
"""Test querying with tag that doesn't exist."""
|
||||||
|
cards = get_cards_with_tag("ThisTagDoesNotExist12345")
|
||||||
|
|
||||||
|
# Should return empty list, not fail
|
||||||
|
assert cards == []
|
||||||
|
|
||||||
|
def test_get_cards_with_all_tags(self):
|
||||||
|
"""Test getting cards that have multiple tags."""
|
||||||
|
# Pick two tags that might overlap
|
||||||
|
cards = get_cards_with_all_tags(["artifacts", "ramp"], limit=10)
|
||||||
|
|
||||||
|
assert isinstance(cards, list)
|
||||||
|
assert len(cards) <= 10
|
||||||
|
|
||||||
|
def test_get_cards_with_all_tags_no_matches(self):
|
||||||
|
"""Test query with tags that likely have no overlap."""
|
||||||
|
cards = get_cards_with_all_tags([
|
||||||
|
"ThisTagDoesNotExist1",
|
||||||
|
"ThisTagDoesNotExist2"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Should return empty list
|
||||||
|
assert cards == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestCacheManagement:
|
||||||
|
"""Test cache management functions."""
|
||||||
|
|
||||||
|
def test_clear_cache(self):
|
||||||
|
"""Test that cache can be cleared without errors."""
|
||||||
|
# Load some data
|
||||||
|
load_tags_for_card("Sol Ring")
|
||||||
|
|
||||||
|
# Clear cache
|
||||||
|
clear_cache()
|
||||||
|
|
||||||
|
# Should still work after clearing
|
||||||
|
tags = load_tags_for_card("Sol Ring")
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
|
||||||
|
def test_cache_persistence(self):
|
||||||
|
"""Test that multiple calls use cached data."""
|
||||||
|
# First call
|
||||||
|
result1 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
|
||||||
|
|
||||||
|
# Second call (should use cache)
|
||||||
|
result2 = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
|
||||||
|
|
||||||
|
# Results should be identical
|
||||||
|
assert result1 == result2
|
||||||
|
|
||||||
|
|
||||||
|
class TestFeatureFlag:
|
||||||
|
"""Test feature flag functionality."""
|
||||||
|
|
||||||
|
def test_is_use_all_cards_enabled_default(self):
|
||||||
|
"""Test that all_cards tag loading is enabled by default."""
|
||||||
|
enabled = is_use_all_cards_enabled()
|
||||||
|
|
||||||
|
# Default should be True
|
||||||
|
assert isinstance(enabled, bool)
|
||||||
|
# We don't assert True since env might override
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Test edge cases and error handling."""
|
||||||
|
|
||||||
|
def test_load_tags_with_special_characters(self):
|
||||||
|
"""Test loading tags for cards with special characters."""
|
||||||
|
# Cards with apostrophes, commas, etc.
|
||||||
|
cards = [
|
||||||
|
"Urza's Saga",
|
||||||
|
"Keeper of the Accord",
|
||||||
|
"Esper Sentinel"
|
||||||
|
]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
# Should handle special characters
|
||||||
|
assert len(result) == 3
|
||||||
|
for card in cards:
|
||||||
|
assert card in result
|
||||||
|
|
||||||
|
def test_load_tags_preserves_card_name_case(self):
|
||||||
|
"""Test that card names preserve their original case."""
|
||||||
|
cards = ["Sol Ring", "LIGHTNING BOLT", "counterspell"]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
# Should have entries for provided names (case-sensitive lookup)
|
||||||
|
assert "Sol Ring" in result or len(result) >= 1
|
||||||
|
# Note: exact case matching depends on all_cards data
|
||||||
|
|
||||||
|
def test_load_tags_deduplicates(self):
|
||||||
|
"""Test that duplicate tags are handled."""
|
||||||
|
# Load tags for a card
|
||||||
|
tags = load_tags_for_card("Sol Ring")
|
||||||
|
|
||||||
|
# If any tags present, check for no duplicates
|
||||||
|
if tags:
|
||||||
|
assert len(tags) == len(set(tags))
|
||||||
|
|
||||||
|
def test_large_batch_performance(self):
|
||||||
|
"""Test that large batch loads complete in reasonable time."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Create a batch of 100 common cards
|
||||||
|
cards = ["Sol Ring"] * 50 + ["Lightning Bolt"] * 50
|
||||||
|
|
||||||
|
start = time.perf_counter()
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
|
||||||
|
# Should complete quickly (< 1 second for 100 cards)
|
||||||
|
assert elapsed < 1.0
|
||||||
|
assert len(result) >= 1 # At least one card found
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatVariations:
|
||||||
|
"""Test handling of different tag format variations."""
|
||||||
|
|
||||||
|
def test_empty_tags_handled(self):
|
||||||
|
"""Test that cards with no tags return empty list."""
|
||||||
|
# Pick a card that might have no tags (basic lands usually don't)
|
||||||
|
tags = load_tags_for_card("Plains")
|
||||||
|
|
||||||
|
# Should be empty list, not None or error
|
||||||
|
assert tags == [] or isinstance(tags, list)
|
||||||
|
|
||||||
|
def test_string_list_repr_parsed(self):
|
||||||
|
"""Test parsing of string representations like \"['tag1', 'tag2']\"."""
|
||||||
|
# This is tested implicitly through load_tags_for_cards
|
||||||
|
# The loader handles multiple formats internally
|
||||||
|
cards = ["Sol Ring", "Lightning Bolt", "Counterspell"]
|
||||||
|
result = load_tags_for_cards(cards)
|
||||||
|
|
||||||
|
# All results should be lists
|
||||||
|
for card, tags in result.items():
|
||||||
|
assert isinstance(tags, list)
|
||||||
|
# No stray brackets or quotes
|
||||||
|
for tag in tags:
|
||||||
|
assert "[" not in tag
|
||||||
|
assert "]" not in tag
|
||||||
|
assert '"' not in tag
|
||||||
|
assert "'" not in tag or tag.count("'") > 1 # Allow apostrophes in words
|
||||||
|
|
||||||
|
def test_comma_separated_parsed(self):
|
||||||
|
"""Test parsing of comma-separated tag strings."""
|
||||||
|
# The loader should handle comma-separated strings
|
||||||
|
# This is tested implicitly by loading any card
|
||||||
|
result = load_tags_for_cards(["Sol Ring"])
|
||||||
|
|
||||||
|
if result.get("Sol Ring"):
|
||||||
|
tags = result["Sol Ring"]
|
||||||
|
# Tags should be split properly (no commas in individual tags)
|
||||||
|
for tag in tags:
|
||||||
|
assert "," not in tag or tag.count(",") == 0
|
||||||
370
code/tests/test_theme_enrichment.py
Normal file
370
code/tests/test_theme_enrichment.py
Normal file
|
|
@ -0,0 +1,370 @@
|
||||||
|
"""Tests for consolidated theme enrichment pipeline.
|
||||||
|
|
||||||
|
These tests verify that the new consolidated pipeline produces the same results
|
||||||
|
as the old 7-script approach, but much faster.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
except ImportError:
|
||||||
|
yaml = None
|
||||||
|
|
||||||
|
from code.tagging.theme_enrichment import (
|
||||||
|
ThemeEnrichmentPipeline,
|
||||||
|
EnrichmentStats,
|
||||||
|
run_enrichment_pipeline,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Skip all tests if PyYAML not available
|
||||||
|
pytestmark = pytest.mark.skipif(yaml is None, reason="PyYAML not installed")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_catalog_dir(tmp_path: Path) -> Path:
|
||||||
|
"""Create temporary catalog directory with test themes."""
|
||||||
|
catalog_dir = tmp_path / 'config' / 'themes' / 'catalog'
|
||||||
|
catalog_dir.mkdir(parents=True)
|
||||||
|
return catalog_dir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_root(tmp_path: Path, temp_catalog_dir: Path) -> Path:
|
||||||
|
"""Create temporary project root."""
|
||||||
|
# Create theme_list.json
|
||||||
|
theme_json = tmp_path / 'config' / 'themes' / 'theme_list.json'
|
||||||
|
theme_json.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
theme_json.write_text('{"themes": []}', encoding='utf-8')
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
def write_theme(catalog_dir: Path, filename: str, data: Dict[str, Any]) -> Path:
|
||||||
|
"""Helper to write a theme YAML file."""
|
||||||
|
path = catalog_dir / filename
|
||||||
|
path.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True), encoding='utf-8')
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def read_theme(path: Path) -> Dict[str, Any]:
|
||||||
|
"""Helper to read a theme YAML file."""
|
||||||
|
return yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||||
|
|
||||||
|
|
||||||
|
class TestThemeEnrichmentPipeline:
|
||||||
|
"""Tests for ThemeEnrichmentPipeline class."""
|
||||||
|
|
||||||
|
def test_init(self, temp_root: Path):
|
||||||
|
"""Test pipeline initialization."""
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
|
||||||
|
assert pipeline.root == temp_root
|
||||||
|
assert pipeline.min_examples == 5
|
||||||
|
assert pipeline.catalog_dir == temp_root / 'config' / 'themes' / 'catalog'
|
||||||
|
assert len(pipeline.themes) == 0
|
||||||
|
|
||||||
|
def test_load_themes_empty_dir(self, temp_root: Path):
|
||||||
|
"""Test loading themes from empty directory."""
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
|
||||||
|
assert len(pipeline.themes) == 0
|
||||||
|
assert pipeline.stats.total_themes == 0
|
||||||
|
|
||||||
|
def test_load_themes_with_valid_files(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test loading valid theme files."""
|
||||||
|
write_theme(temp_catalog_dir, 'landfall.yml', {
|
||||||
|
'display_name': 'Landfall',
|
||||||
|
'synergies': ['Ramp', 'Tokens'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
write_theme(temp_catalog_dir, 'reanimate.yml', {
|
||||||
|
'display_name': 'Reanimate',
|
||||||
|
'synergies': ['Graveyard', 'Mill'],
|
||||||
|
'example_commanders': ['Meren of Clan Nel Toth']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
|
||||||
|
assert len(pipeline.themes) == 2
|
||||||
|
assert pipeline.stats.total_themes == 2
|
||||||
|
|
||||||
|
def test_autofill_placeholders_empty_examples(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test autofill adds placeholders to themes with no examples."""
|
||||||
|
write_theme(temp_catalog_dir, 'tokens.yml', {
|
||||||
|
'display_name': 'Tokens Matter',
|
||||||
|
'synergies': ['Sacrifice', 'Aristocrats'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.autofill_placeholders()
|
||||||
|
|
||||||
|
assert pipeline.stats.autofilled == 1
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert theme.modified
|
||||||
|
assert 'Tokens Matter Anchor' in theme.data['example_commanders']
|
||||||
|
assert 'Sacrifice Anchor' in theme.data['example_commanders']
|
||||||
|
assert 'Aristocrats Anchor' in theme.data['example_commanders']
|
||||||
|
assert theme.data.get('editorial_quality') == 'draft'
|
||||||
|
|
||||||
|
def test_autofill_skips_themes_with_examples(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test autofill skips themes that already have examples."""
|
||||||
|
write_theme(temp_catalog_dir, 'landfall.yml', {
|
||||||
|
'display_name': 'Landfall',
|
||||||
|
'synergies': ['Ramp'],
|
||||||
|
'example_commanders': ['Tatyova, Benthic Druid']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.autofill_placeholders()
|
||||||
|
|
||||||
|
assert pipeline.stats.autofilled == 0
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert not theme.modified
|
||||||
|
|
||||||
|
def test_pad_examples_to_minimum(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test padding adds placeholders to reach minimum threshold."""
|
||||||
|
write_theme(temp_catalog_dir, 'ramp.yml', {
|
||||||
|
'display_name': 'Ramp',
|
||||||
|
'synergies': ['Landfall', 'BigSpells', 'Hydras'],
|
||||||
|
'example_commanders': ['Ramp Anchor', 'Landfall Anchor']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.pad_examples()
|
||||||
|
|
||||||
|
assert pipeline.stats.padded == 1
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert theme.modified
|
||||||
|
assert len(theme.data['example_commanders']) == 5
|
||||||
|
# Should add synergies first (3rd synergy), then letter suffixes
|
||||||
|
assert 'Hydras Anchor' in theme.data['example_commanders']
|
||||||
|
# Should also have letter suffixes for remaining slots
|
||||||
|
assert any('Anchor B' in cmd or 'Anchor C' in cmd for cmd in theme.data['example_commanders'])
|
||||||
|
|
||||||
|
def test_pad_skips_mixed_real_and_placeholder(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test padding skips lists with both real and placeholder examples."""
|
||||||
|
write_theme(temp_catalog_dir, 'tokens.yml', {
|
||||||
|
'display_name': 'Tokens',
|
||||||
|
'synergies': ['Sacrifice'],
|
||||||
|
'example_commanders': ['Krenko, Mob Boss', 'Tokens Anchor']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.pad_examples()
|
||||||
|
|
||||||
|
assert pipeline.stats.padded == 0
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert not theme.modified
|
||||||
|
|
||||||
|
def test_cleanup_removes_placeholders_when_real_present(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test cleanup removes placeholders when real examples are present.
|
||||||
|
|
||||||
|
Note: cleanup only removes entries ending with ' Anchor' (no suffix).
|
||||||
|
Purge step removes entries with ' Anchor' or ' Anchor X' pattern.
|
||||||
|
"""
|
||||||
|
write_theme(temp_catalog_dir, 'lifegain.yml', {
|
||||||
|
'display_name': 'Lifegain',
|
||||||
|
'synergies': [],
|
||||||
|
'example_commanders': [
|
||||||
|
'Oloro, Ageless Ascetic',
|
||||||
|
'Lifegain Anchor', # Will be removed
|
||||||
|
'Trelasarra, Moon Dancer',
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.cleanup_placeholders()
|
||||||
|
|
||||||
|
assert pipeline.stats.cleaned == 1
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert theme.modified
|
||||||
|
assert len(theme.data['example_commanders']) == 2
|
||||||
|
assert 'Oloro, Ageless Ascetic' in theme.data['example_commanders']
|
||||||
|
assert 'Trelasarra, Moon Dancer' in theme.data['example_commanders']
|
||||||
|
assert 'Lifegain Anchor' not in theme.data['example_commanders']
|
||||||
|
|
||||||
|
def test_purge_removes_all_anchors(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test purge removes all anchor placeholders (even if no real examples)."""
|
||||||
|
write_theme(temp_catalog_dir, 'counters.yml', {
|
||||||
|
'display_name': 'Counters',
|
||||||
|
'synergies': [],
|
||||||
|
'example_commanders': [
|
||||||
|
'Counters Anchor',
|
||||||
|
'Counters Anchor B',
|
||||||
|
'Counters Anchor C'
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.purge_anchors()
|
||||||
|
|
||||||
|
assert pipeline.stats.purged == 1
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert theme.modified
|
||||||
|
assert theme.data['example_commanders'] == []
|
||||||
|
|
||||||
|
def test_augment_from_catalog(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test augmentation adds missing fields from catalog."""
|
||||||
|
# Create catalog JSON
|
||||||
|
catalog_json = temp_root / 'config' / 'themes' / 'theme_list.json'
|
||||||
|
catalog_data = {
|
||||||
|
'themes': [
|
||||||
|
{
|
||||||
|
'theme': 'Landfall',
|
||||||
|
'description': 'Triggers from lands entering',
|
||||||
|
'popularity_bucket': 'common',
|
||||||
|
'popularity_hint': 'Very popular',
|
||||||
|
'deck_archetype': 'Lands'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
import json
|
||||||
|
catalog_json.write_text(json.dumps(catalog_data), encoding='utf-8')
|
||||||
|
|
||||||
|
write_theme(temp_catalog_dir, 'landfall.yml', {
|
||||||
|
'display_name': 'Landfall',
|
||||||
|
'synergies': ['Ramp'],
|
||||||
|
'example_commanders': ['Tatyova, Benthic Druid']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.augment_from_catalog()
|
||||||
|
|
||||||
|
assert pipeline.stats.augmented == 1
|
||||||
|
theme = list(pipeline.themes.values())[0]
|
||||||
|
assert theme.modified
|
||||||
|
assert theme.data['description'] == 'Triggers from lands entering'
|
||||||
|
assert theme.data['popularity_bucket'] == 'common'
|
||||||
|
assert theme.data['popularity_hint'] == 'Very popular'
|
||||||
|
assert theme.data['deck_archetype'] == 'Lands'
|
||||||
|
|
||||||
|
def test_validate_min_examples_warning(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test validation warns about insufficient examples."""
|
||||||
|
write_theme(temp_catalog_dir, 'ramp.yml', {
|
||||||
|
'display_name': 'Ramp',
|
||||||
|
'synergies': [],
|
||||||
|
'example_commanders': ['Ramp Commander']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.validate(enforce_min=False)
|
||||||
|
|
||||||
|
assert pipeline.stats.lint_warnings > 0
|
||||||
|
assert pipeline.stats.lint_errors == 0
|
||||||
|
|
||||||
|
def test_validate_min_examples_error(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test validation errors on insufficient examples when enforced."""
|
||||||
|
write_theme(temp_catalog_dir, 'ramp.yml', {
|
||||||
|
'display_name': 'Ramp',
|
||||||
|
'synergies': [],
|
||||||
|
'example_commanders': ['Ramp Commander']
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.validate(enforce_min=True)
|
||||||
|
|
||||||
|
assert pipeline.stats.lint_errors > 0
|
||||||
|
|
||||||
|
def test_write_themes_dry_run(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test dry run doesn't write files."""
|
||||||
|
theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
|
||||||
|
'display_name': 'Tokens',
|
||||||
|
'synergies': [],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
|
||||||
|
original_content = theme_path.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.autofill_placeholders()
|
||||||
|
# Don't call write_all_themes()
|
||||||
|
|
||||||
|
# File should be unchanged
|
||||||
|
assert theme_path.read_text(encoding='utf-8') == original_content
|
||||||
|
|
||||||
|
def test_write_themes_saves_changes(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test write_all_themes saves modified files."""
|
||||||
|
theme_path = write_theme(temp_catalog_dir, 'tokens.yml', {
|
||||||
|
'display_name': 'Tokens',
|
||||||
|
'synergies': ['Sacrifice'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root)
|
||||||
|
pipeline.load_all_themes()
|
||||||
|
pipeline.autofill_placeholders()
|
||||||
|
pipeline.write_all_themes()
|
||||||
|
|
||||||
|
# File should be updated
|
||||||
|
updated_data = read_theme(theme_path)
|
||||||
|
assert len(updated_data['example_commanders']) > 0
|
||||||
|
assert 'Tokens Anchor' in updated_data['example_commanders']
|
||||||
|
|
||||||
|
def test_run_all_full_pipeline(self, temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test running the complete enrichment pipeline."""
|
||||||
|
write_theme(temp_catalog_dir, 'landfall.yml', {
|
||||||
|
'display_name': 'Landfall',
|
||||||
|
'synergies': ['Ramp', 'Lands'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
write_theme(temp_catalog_dir, 'reanimate.yml', {
|
||||||
|
'display_name': 'Reanimate',
|
||||||
|
'synergies': ['Graveyard'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
|
||||||
|
pipeline = ThemeEnrichmentPipeline(root=temp_root, min_examples=5)
|
||||||
|
stats = pipeline.run_all(write=True, enforce_min=False, strict_lint=False)
|
||||||
|
|
||||||
|
assert stats.total_themes == 2
|
||||||
|
assert stats.autofilled >= 2
|
||||||
|
assert stats.padded >= 2
|
||||||
|
|
||||||
|
# Verify files were updated
|
||||||
|
landfall_data = read_theme(temp_catalog_dir / 'landfall.yml')
|
||||||
|
assert len(landfall_data['example_commanders']) >= 5
|
||||||
|
assert landfall_data.get('editorial_quality') == 'draft'
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_enrichment_pipeline_convenience_function(temp_root: Path, temp_catalog_dir: Path):
|
||||||
|
"""Test the convenience function wrapper."""
|
||||||
|
write_theme(temp_catalog_dir, 'tokens.yml', {
|
||||||
|
'display_name': 'Tokens',
|
||||||
|
'synergies': ['Sacrifice'],
|
||||||
|
'example_commanders': []
|
||||||
|
})
|
||||||
|
|
||||||
|
stats = run_enrichment_pipeline(
|
||||||
|
root=temp_root,
|
||||||
|
min_examples=3,
|
||||||
|
write=True,
|
||||||
|
enforce_min=False,
|
||||||
|
strict=False,
|
||||||
|
progress_callback=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(stats, EnrichmentStats)
|
||||||
|
assert stats.total_themes == 1
|
||||||
|
assert stats.autofilled >= 1
|
||||||
|
|
||||||
|
# Verify file was written
|
||||||
|
tokens_data = read_theme(temp_catalog_dir / 'tokens.yml')
|
||||||
|
assert len(tokens_data['example_commanders']) >= 3
|
||||||
214
code/tests/test_web_tag_endpoints.py
Normal file
214
code/tests/test_web_tag_endpoints.py
Normal file
|
|
@ -0,0 +1,214 @@
|
||||||
|
"""Tests for web tag search endpoints."""
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
"""Create a test client for the web app."""
|
||||||
|
# Import here to avoid circular imports
|
||||||
|
from code.web.app import app
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
def test_theme_autocomplete_basic(client):
|
||||||
|
"""Test basic theme autocomplete functionality."""
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=life&limit=5")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "text/html" in response.headers["content-type"]
|
||||||
|
|
||||||
|
content = response.text
|
||||||
|
assert "autocomplete-item" in content
|
||||||
|
assert "Life" in content # Should match tags starting with "life"
|
||||||
|
assert "tag-count" in content # Should show card counts
|
||||||
|
|
||||||
|
|
||||||
|
def test_theme_autocomplete_min_length(client):
|
||||||
|
"""Test that theme autocomplete requires minimum 2 characters."""
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=a&limit=5")
|
||||||
|
|
||||||
|
# Should fail validation
|
||||||
|
assert response.status_code == 422
|
||||||
|
|
||||||
|
|
||||||
|
def test_theme_autocomplete_no_matches(client):
|
||||||
|
"""Test theme autocomplete with query that has no matches."""
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=zzzzzzzzz&limit=5")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
content = response.text
|
||||||
|
assert "autocomplete-empty" in content or "No matching themes" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_theme_autocomplete_limit(client):
|
||||||
|
"""Test that theme autocomplete respects limit parameter."""
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=a&limit=3")
|
||||||
|
|
||||||
|
assert response.status_code in [200, 422] # May fail min_length validation
|
||||||
|
|
||||||
|
# Try with valid length
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=to&limit=3")
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
# Count items (rough check - should have at most 3)
|
||||||
|
content = response.text
|
||||||
|
item_count = content.count('class="autocomplete-item"')
|
||||||
|
assert item_count <= 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_cards_by_tags_and_logic(client):
|
||||||
|
"""Test card search with AND logic."""
|
||||||
|
response = client.get("/api/cards/by-tags?tags=tokens&logic=AND&limit=10")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert "tags" in data
|
||||||
|
assert "logic" in data
|
||||||
|
assert data["logic"] == "AND"
|
||||||
|
assert "total_matches" in data
|
||||||
|
assert "cards" in data
|
||||||
|
assert isinstance(data["cards"], list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_cards_by_tags_or_logic(client):
|
||||||
|
"""Test card search with OR logic."""
|
||||||
|
response = client.get("/api/cards/by-tags?tags=tokens,sacrifice&logic=OR&limit=10")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert data["logic"] == "OR"
|
||||||
|
assert "cards" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_cards_by_tags_invalid_logic(client):
|
||||||
|
"""Test that invalid logic parameter returns error."""
|
||||||
|
response = client.get("/api/cards/by-tags?tags=tokens&logic=INVALID&limit=10")
|
||||||
|
|
||||||
|
assert response.status_code == 400
|
||||||
|
data = response.json()
|
||||||
|
assert "error" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_cards_by_tags_empty_tags(client):
|
||||||
|
"""Test that empty tags parameter returns error."""
|
||||||
|
response = client.get("/api/cards/by-tags?tags=&logic=AND&limit=10")
|
||||||
|
|
||||||
|
assert response.status_code == 400
|
||||||
|
data = response.json()
|
||||||
|
assert "error" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_tags_search(client):
|
||||||
|
"""Test tag search autocomplete endpoint."""
|
||||||
|
response = client.get("/api/cards/tags/search?q=life&limit=10")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert "query" in data
|
||||||
|
assert data["query"] == "life"
|
||||||
|
assert "matches" in data
|
||||||
|
assert isinstance(data["matches"], list)
|
||||||
|
|
||||||
|
# Check match structure
|
||||||
|
if data["matches"]:
|
||||||
|
match = data["matches"][0]
|
||||||
|
assert "tag" in match
|
||||||
|
assert "card_count" in match
|
||||||
|
assert match["tag"].lower().startswith("life")
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_tags_search_min_length(client):
|
||||||
|
"""Test that tag search requires minimum 2 characters."""
|
||||||
|
response = client.get("/api/cards/tags/search?q=a&limit=10")
|
||||||
|
|
||||||
|
# Should fail validation
|
||||||
|
assert response.status_code == 422
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_tags_popular(client):
|
||||||
|
"""Test popular tags endpoint."""
|
||||||
|
response = client.get("/api/cards/tags/popular?limit=20")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert "count" in data
|
||||||
|
assert "tags" in data
|
||||||
|
assert isinstance(data["tags"], list)
|
||||||
|
assert data["count"] == len(data["tags"])
|
||||||
|
assert data["count"] <= 20
|
||||||
|
|
||||||
|
# Check tag structure
|
||||||
|
if data["tags"]:
|
||||||
|
tag = data["tags"][0]
|
||||||
|
assert "tag" in tag
|
||||||
|
assert "card_count" in tag
|
||||||
|
assert isinstance(tag["card_count"], int)
|
||||||
|
|
||||||
|
# Tags should be sorted by card count (descending)
|
||||||
|
if len(data["tags"]) > 1:
|
||||||
|
assert data["tags"][0]["card_count"] >= data["tags"][1]["card_count"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_tags_popular_limit(client):
|
||||||
|
"""Test that popular tags endpoint respects limit."""
|
||||||
|
response = client.get("/api/cards/tags/popular?limit=5")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data["tags"]) <= 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_commanders_page_loads(client):
|
||||||
|
"""Test that commanders page loads successfully."""
|
||||||
|
response = client.get("/commanders")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "text/html" in response.headers["content-type"]
|
||||||
|
|
||||||
|
content = response.text
|
||||||
|
# Should have the theme filter input
|
||||||
|
assert "commander-theme" in content
|
||||||
|
assert "theme-suggestions" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_commanders_page_with_theme_filter(client):
|
||||||
|
"""Test commanders page with theme query parameter."""
|
||||||
|
response = client.get("/commanders?theme=tokens")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
content = response.text
|
||||||
|
|
||||||
|
# Should have the theme value in the input
|
||||||
|
assert 'value="tokens"' in content or "tokens" in content
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Performance test - run manually")
|
||||||
|
def test_theme_autocomplete_performance(client):
|
||||||
|
"""Test that theme autocomplete responds quickly."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
response = client.get("/commanders/theme-autocomplete?theme=to&limit=20")
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert elapsed < 0.05 # Should respond in <50ms
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Performance test - run manually")
|
||||||
|
def test_api_tags_search_performance(client):
|
||||||
|
"""Test that tag search responds quickly."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
response = client.get("/api/cards/tags/search?q=to&limit=20")
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert elapsed < 0.05 # Should respond in <50ms
|
||||||
|
|
@ -2205,6 +2205,7 @@ from .routes import themes as themes_routes # noqa: E402
|
||||||
from .routes import commanders as commanders_routes # noqa: E402
|
from .routes import commanders as commanders_routes # noqa: E402
|
||||||
from .routes import partner_suggestions as partner_suggestions_routes # noqa: E402
|
from .routes import partner_suggestions as partner_suggestions_routes # noqa: E402
|
||||||
from .routes import telemetry as telemetry_routes # noqa: E402
|
from .routes import telemetry as telemetry_routes # noqa: E402
|
||||||
|
from .routes import cards as cards_routes # noqa: E402
|
||||||
app.include_router(build_routes.router)
|
app.include_router(build_routes.router)
|
||||||
app.include_router(config_routes.router)
|
app.include_router(config_routes.router)
|
||||||
app.include_router(decks_routes.router)
|
app.include_router(decks_routes.router)
|
||||||
|
|
@ -2214,6 +2215,7 @@ app.include_router(themes_routes.router)
|
||||||
app.include_router(commanders_routes.router)
|
app.include_router(commanders_routes.router)
|
||||||
app.include_router(partner_suggestions_routes.router)
|
app.include_router(partner_suggestions_routes.router)
|
||||||
app.include_router(telemetry_routes.router)
|
app.include_router(telemetry_routes.router)
|
||||||
|
app.include_router(cards_routes.router)
|
||||||
|
|
||||||
# Warm validation cache early to reduce first-call latency in tests and dev
|
# Warm validation cache early to reduce first-call latency in tests and dev
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
186
code/web/routes/cards.py
Normal file
186
code/web/routes/cards.py
Normal file
|
|
@ -0,0 +1,186 @@
|
||||||
|
"""Card browsing and tag search API endpoints."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
from fastapi import APIRouter, Query
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
# Import tag index from M3
|
||||||
|
try:
|
||||||
|
from code.tagging.tag_index import get_tag_index
|
||||||
|
except ImportError:
|
||||||
|
from tagging.tag_index import get_tag_index
|
||||||
|
|
||||||
|
# Import all cards loader
|
||||||
|
try:
|
||||||
|
from code.services.all_cards_loader import AllCardsLoader
|
||||||
|
except ImportError:
|
||||||
|
from services.all_cards_loader import AllCardsLoader
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/cards", tags=["cards"])
|
||||||
|
|
||||||
|
# Cache for all_cards loader
|
||||||
|
_all_cards_loader: Optional[AllCardsLoader] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_all_cards_loader() -> AllCardsLoader:
|
||||||
|
"""Get cached AllCardsLoader instance."""
|
||||||
|
global _all_cards_loader
|
||||||
|
if _all_cards_loader is None:
|
||||||
|
_all_cards_loader = AllCardsLoader()
|
||||||
|
return _all_cards_loader
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/by-tags")
|
||||||
|
async def search_by_tags(
|
||||||
|
tags: str = Query(..., description="Comma-separated list of theme tags"),
|
||||||
|
logic: str = Query("AND", description="Search logic: AND (intersection) or OR (union)"),
|
||||||
|
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
|
||||||
|
) -> JSONResponse:
|
||||||
|
"""Search for cards by theme tags.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
/api/cards/by-tags?tags=tokens&logic=AND
|
||||||
|
/api/cards/by-tags?tags=tokens,sacrifice&logic=AND
|
||||||
|
/api/cards/by-tags?tags=lifegain,lifelink&logic=OR
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tags: Comma-separated theme tags to search for
|
||||||
|
logic: "AND" for cards with all tags, "OR" for cards with any tag
|
||||||
|
limit: Maximum results to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON with matching cards and metadata
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Parse tags
|
||||||
|
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
if not tag_list:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={"error": "No valid tags provided"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get tag index and find matching cards
|
||||||
|
tag_index = get_tag_index()
|
||||||
|
|
||||||
|
if logic.upper() == "AND":
|
||||||
|
card_names = tag_index.get_cards_with_all_tags(tag_list)
|
||||||
|
elif logic.upper() == "OR":
|
||||||
|
card_names = tag_index.get_cards_with_any_tags(tag_list)
|
||||||
|
else:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={"error": f"Invalid logic: {logic}. Use AND or OR."}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load full card data
|
||||||
|
all_cards = _get_all_cards_loader().load()
|
||||||
|
matching_cards = all_cards[all_cards["name"].isin(card_names)]
|
||||||
|
|
||||||
|
# Limit results
|
||||||
|
matching_cards = matching_cards.head(limit)
|
||||||
|
|
||||||
|
# Convert to dict
|
||||||
|
results = matching_cards.to_dict("records")
|
||||||
|
|
||||||
|
return JSONResponse(content={
|
||||||
|
"tags": tag_list,
|
||||||
|
"logic": logic.upper(),
|
||||||
|
"total_matches": len(card_names),
|
||||||
|
"returned": len(results),
|
||||||
|
"limit": limit,
|
||||||
|
"cards": results
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={"error": f"Search failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/tags/search")
|
||||||
|
async def search_tags(
|
||||||
|
q: str = Query(..., min_length=2, description="Tag prefix to search for"),
|
||||||
|
limit: int = Query(10, ge=1, le=50, description="Maximum number of suggestions"),
|
||||||
|
) -> JSONResponse:
|
||||||
|
"""Autocomplete search for theme tags.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
/api/cards/tags/search?q=life
|
||||||
|
/api/cards/tags/search?q=token&limit=5
|
||||||
|
|
||||||
|
Args:
|
||||||
|
q: Tag prefix (minimum 2 characters)
|
||||||
|
limit: Maximum suggestions to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON with matching tags sorted by popularity
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tag_index = get_tag_index()
|
||||||
|
|
||||||
|
# Get all tags with counts - get_popular_tags returns all tags when given a high limit
|
||||||
|
all_tags_with_counts = tag_index.get_popular_tags(limit=10000)
|
||||||
|
|
||||||
|
# Filter by prefix (case-insensitive)
|
||||||
|
prefix_lower = q.lower()
|
||||||
|
matches = [
|
||||||
|
(tag, count)
|
||||||
|
for tag, count in all_tags_with_counts
|
||||||
|
if tag.lower().startswith(prefix_lower)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Already sorted by popularity from get_popular_tags
|
||||||
|
# Limit results
|
||||||
|
matches = matches[:limit]
|
||||||
|
|
||||||
|
return JSONResponse(content={
|
||||||
|
"query": q,
|
||||||
|
"matches": [
|
||||||
|
{"tag": tag, "card_count": count}
|
||||||
|
for tag, count in matches
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={"error": f"Tag search failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/tags/popular")
|
||||||
|
async def get_popular_tags(
|
||||||
|
limit: int = Query(50, ge=1, le=200, description="Number of popular tags to return"),
|
||||||
|
) -> JSONResponse:
|
||||||
|
"""Get the most popular theme tags by card count.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
/api/cards/tags/popular
|
||||||
|
/api/cards/tags/popular?limit=20
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: Maximum tags to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON with popular tags sorted by card count
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tag_index = get_tag_index()
|
||||||
|
popular = tag_index.get_popular_tags(limit=limit)
|
||||||
|
|
||||||
|
return JSONResponse(content={
|
||||||
|
"count": len(popular),
|
||||||
|
"tags": [
|
||||||
|
{"tag": tag, "card_count": count}
|
||||||
|
for tag, count in popular
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={"error": f"Failed to get popular tags: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
@ -526,6 +526,52 @@ def _build_theme_info(records: Sequence[CommanderRecord]) -> dict[str, Commander
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/theme-autocomplete", response_class=HTMLResponse)
|
||||||
|
async def theme_autocomplete(
|
||||||
|
request: Request,
|
||||||
|
theme: str = Query(..., min_length=2, description="Theme prefix to search for"),
|
||||||
|
limit: int = Query(20, ge=1, le=50),
|
||||||
|
) -> HTMLResponse:
|
||||||
|
"""HTMX endpoint for theme tag autocomplete."""
|
||||||
|
try:
|
||||||
|
# Import tag_index
|
||||||
|
try:
|
||||||
|
from code.tagging.tag_index import get_tag_index
|
||||||
|
except ImportError:
|
||||||
|
from tagging.tag_index import get_tag_index
|
||||||
|
|
||||||
|
tag_index = get_tag_index()
|
||||||
|
|
||||||
|
# Get all tags with counts - get_popular_tags returns all tags when given a high limit
|
||||||
|
all_tags_with_counts = tag_index.get_popular_tags(limit=10000)
|
||||||
|
|
||||||
|
# Filter by prefix (case-insensitive)
|
||||||
|
prefix_lower = theme.lower()
|
||||||
|
matches = [
|
||||||
|
(tag, count)
|
||||||
|
for tag, count in all_tags_with_counts
|
||||||
|
if tag.lower().startswith(prefix_lower)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Already sorted by popularity from get_popular_tags
|
||||||
|
matches = matches[:limit]
|
||||||
|
|
||||||
|
# Generate HTML suggestions with ARIA attributes
|
||||||
|
html_parts = []
|
||||||
|
for tag, count in matches:
|
||||||
|
html_parts.append(
|
||||||
|
f'<div class="autocomplete-item" data-value="{tag}" role="option">'
|
||||||
|
f'{tag} <span class="tag-count">({count})</span></div>'
|
||||||
|
)
|
||||||
|
|
||||||
|
html = "\n".join(html_parts) if html_parts else '<div class="autocomplete-empty">No matching themes</div>'
|
||||||
|
|
||||||
|
return HTMLResponse(content=html)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return HTMLResponse(content=f'<div class="autocomplete-error">Error: {str(e)}</div>')
|
||||||
|
|
||||||
|
|
||||||
@router.get("/", response_class=HTMLResponse)
|
@router.get("/", response_class=HTMLResponse)
|
||||||
async def commanders_index(
|
async def commanders_index(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
|
|
||||||
|
|
@ -153,40 +153,44 @@ def _display_tags_from_entry(entry: Dict[str, Any]) -> List[str]:
|
||||||
def _run_theme_metadata_enrichment(out_func=None) -> None:
|
def _run_theme_metadata_enrichment(out_func=None) -> None:
|
||||||
"""Run full metadata enrichment sequence after theme catalog/YAML generation.
|
"""Run full metadata enrichment sequence after theme catalog/YAML generation.
|
||||||
|
|
||||||
Idempotent: each script is safe to re-run; errors are swallowed (logged) to avoid
|
Uses consolidated ThemeEnrichmentPipeline for 5-10x faster processing.
|
||||||
|
Idempotent: safe to re-run; errors are swallowed (logged) to avoid
|
||||||
impacting primary setup/tagging pipeline. Designed to centralize logic so both
|
impacting primary setup/tagging pipeline. Designed to centralize logic so both
|
||||||
manual refresh (routes/themes.py) and automatic setup flows invoke identical steps.
|
manual refresh (routes/themes.py) and automatic setup flows invoke identical steps.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
import sys
|
from pathlib import Path
|
||||||
import subprocess
|
from code.tagging.theme_enrichment import run_enrichment_pipeline
|
||||||
root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
|
||||||
scripts_dir = os.path.join(root, 'code', 'scripts')
|
root = Path(__file__).resolve().parents[3]
|
||||||
py = sys.executable
|
min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
|
||||||
steps: List[List[str]] = [
|
|
||||||
[py, os.path.join(scripts_dir, 'autofill_min_examples.py')],
|
|
||||||
[py, os.path.join(scripts_dir, 'pad_min_examples.py'), '--min', os.environ.get('EDITORIAL_MIN_EXAMPLES', '5')],
|
|
||||||
[py, os.path.join(scripts_dir, 'cleanup_placeholder_examples.py'), '--apply'],
|
|
||||||
[py, os.path.join(scripts_dir, 'purge_anchor_placeholders.py'), '--apply'],
|
|
||||||
# Augment YAML with description / popularity buckets from the freshly built catalog
|
|
||||||
[py, os.path.join(scripts_dir, 'augment_theme_yaml_from_catalog.py')],
|
|
||||||
[py, os.path.join(scripts_dir, 'generate_theme_editorial_suggestions.py'), '--apply', '--limit-yaml', '0'],
|
|
||||||
[py, os.path.join(scripts_dir, 'lint_theme_editorial.py')], # non-strict lint pass
|
|
||||||
]
|
|
||||||
def _emit(msg: str):
|
def _emit(msg: str):
|
||||||
try:
|
try:
|
||||||
if out_func:
|
if out_func:
|
||||||
out_func(msg)
|
out_func(msg)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
for cmd in steps:
|
|
||||||
|
# Run consolidated pipeline instead of 7 separate subprocess scripts
|
||||||
|
stats = run_enrichment_pipeline(
|
||||||
|
root=root,
|
||||||
|
min_examples=min_examples,
|
||||||
|
write=True,
|
||||||
|
enforce_min=False, # Non-strict lint pass
|
||||||
|
strict=False,
|
||||||
|
progress_callback=_emit,
|
||||||
|
)
|
||||||
|
|
||||||
|
_emit(f"Theme enrichment complete: {stats.total_themes} themes processed")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if out_func:
|
||||||
try:
|
try:
|
||||||
subprocess.run(cmd, check=True)
|
out_func(f"[metadata_enrich] pipeline failed: {e}")
|
||||||
except Exception as e:
|
except Exception:
|
||||||
_emit(f"[metadata_enrich] step failed ({os.path.basename(cmd[1]) if len(cmd)>1 else cmd}): {e}")
|
pass
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1144,6 +1148,13 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
|
||||||
# Run metadata enrichment (best-effort) after export sequence.
|
# Run metadata enrichment (best-effort) after export sequence.
|
||||||
try:
|
try:
|
||||||
_run_theme_metadata_enrichment(out_func)
|
_run_theme_metadata_enrichment(out_func)
|
||||||
|
# Rebuild theme_list.json to pick up newly generated example_cards/commanders
|
||||||
|
# from the enrichment pipeline (which populates them from CSV data)
|
||||||
|
if use_merge and os.path.exists(build_script):
|
||||||
|
args = [_sys.executable, build_script]
|
||||||
|
if force:
|
||||||
|
args.append('--force')
|
||||||
|
_run(args, check=True)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -23,15 +23,23 @@
|
||||||
<span class="filter-label">Commander name</span>
|
<span class="filter-label">Commander name</span>
|
||||||
<input type="search" id="commander-search" name="q" value="{{ query }}" placeholder="Search commander names..." autocomplete="off" />
|
<input type="search" id="commander-search" name="q" value="{{ query }}" placeholder="Search commander names..." autocomplete="off" />
|
||||||
</label>
|
</label>
|
||||||
<label>
|
<div class="filter-field">
|
||||||
<span class="filter-label">Theme</span>
|
<label for="commander-theme" class="filter-label">Theme:</label>
|
||||||
<input type="search" id="commander-theme" name="theme" value="{{ theme_query }}" placeholder="Search themes..." list="theme-suggestions" autocomplete="off" />
|
<div class="autocomplete-container">
|
||||||
</label>
|
<input type="search" id="commander-theme" name="theme" value="{{ theme_query }}"
|
||||||
<datalist id="theme-suggestions">
|
placeholder="Search themes..." autocomplete="off"
|
||||||
{% for name in theme_options[:200] %}
|
role="combobox"
|
||||||
<option value="{{ name }}"></option>
|
aria-autocomplete="list"
|
||||||
{% endfor %}
|
aria-controls="theme-suggestions"
|
||||||
</datalist>
|
aria-expanded="false"
|
||||||
|
hx-get="/commanders/theme-autocomplete"
|
||||||
|
hx-trigger="keyup changed delay:300ms"
|
||||||
|
hx-target="#theme-suggestions"
|
||||||
|
hx-include="[name='theme']"
|
||||||
|
hx-swap="innerHTML" />
|
||||||
|
<div id="theme-suggestions" class="autocomplete-dropdown" role="listbox" aria-label="Theme suggestions"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<label>
|
<label>
|
||||||
<span class="filter-label">Color identity</span>
|
<span class="filter-label">Color identity</span>
|
||||||
<select id="commander-color" name="color">
|
<select id="commander-color" name="color">
|
||||||
|
|
@ -185,6 +193,18 @@
|
||||||
.commander-thumb img { width:100%; }
|
.commander-thumb img { width:100%; }
|
||||||
.skeleton-thumb { width:min(70vw, 220px); height:calc(min(70vw, 220px) * 1.4); }
|
.skeleton-thumb { width:min(70vw, 220px); height:calc(min(70vw, 220px) * 1.4); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Autocomplete dropdown styles */
|
||||||
|
.autocomplete-container { position:relative; width:100%; }
|
||||||
|
.autocomplete-dropdown { position:absolute; top:100%; left:0; right:0; z-index:1000; background:var(--panel); border:1px solid var(--border); border-radius:8px; margin-top:4px; max-height:280px; overflow-y:auto; box-shadow:0 4px 12px rgba(0,0,0,.25); display:none; }
|
||||||
|
.autocomplete-dropdown:not(:empty) { display:block; }
|
||||||
|
.autocomplete-item { padding:.5rem .75rem; cursor:pointer; border-bottom:1px solid var(--border); transition:background .15s ease; }
|
||||||
|
.autocomplete-item:last-child { border-bottom:none; }
|
||||||
|
.autocomplete-item:hover, .autocomplete-item:focus, .autocomplete-item.selected { background:rgba(148,163,184,.15); }
|
||||||
|
.autocomplete-item.selected { background:rgba(148,163,184,.25); border-left:3px solid var(--ring); padding-left:calc(.75rem - 3px); }
|
||||||
|
.autocomplete-item .tag-count { color:var(--muted); font-size:.85rem; float:right; }
|
||||||
|
.autocomplete-empty { padding:.75rem; text-align:center; color:var(--muted); font-size:.85rem; }
|
||||||
|
.autocomplete-error { padding:.75rem; text-align:center; color:#f87171; font-size:.85rem; }
|
||||||
</style>
|
</style>
|
||||||
<script>
|
<script>
|
||||||
(function(){
|
(function(){
|
||||||
|
|
@ -215,6 +235,107 @@
|
||||||
resetPage();
|
resetPage();
|
||||||
setLastTrigger('theme');
|
setLastTrigger('theme');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Autocomplete dropdown handling
|
||||||
|
const autocompleteDropdown = document.getElementById('theme-suggestions');
|
||||||
|
if (autocompleteDropdown) {
|
||||||
|
let selectedIndex = -1;
|
||||||
|
|
||||||
|
// Helper to get all autocomplete items
|
||||||
|
const getItems = () => Array.from(autocompleteDropdown.querySelectorAll('.autocomplete-item'));
|
||||||
|
|
||||||
|
// Helper to select an item by index
|
||||||
|
const selectItem = (index) => {
|
||||||
|
const items = getItems();
|
||||||
|
items.forEach((item, i) => {
|
||||||
|
if (i === index) {
|
||||||
|
item.classList.add('selected');
|
||||||
|
item.scrollIntoView({ block: 'nearest', behavior: 'smooth' });
|
||||||
|
} else {
|
||||||
|
item.classList.remove('selected');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
selectedIndex = index;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Helper to apply selected item
|
||||||
|
const applySelectedItem = () => {
|
||||||
|
const items = getItems();
|
||||||
|
const item = items[selectedIndex];
|
||||||
|
if (item && item.dataset.value) {
|
||||||
|
themeField.value = item.dataset.value;
|
||||||
|
autocompleteDropdown.innerHTML = '';
|
||||||
|
selectedIndex = -1;
|
||||||
|
themeField.dispatchEvent(new Event('input', { bubbles: true }));
|
||||||
|
form.dispatchEvent(new Event('submit', { bubbles: true }));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Reset selection when dropdown content changes
|
||||||
|
const observer = new MutationObserver(() => {
|
||||||
|
selectedIndex = -1;
|
||||||
|
getItems().forEach(item => item.classList.remove('selected'));
|
||||||
|
// Update aria-expanded based on dropdown content
|
||||||
|
const hasContent = autocompleteDropdown.children.length > 0;
|
||||||
|
themeField.setAttribute('aria-expanded', hasContent ? 'true' : 'false');
|
||||||
|
});
|
||||||
|
observer.observe(autocompleteDropdown, { childList: true });
|
||||||
|
|
||||||
|
// Click handler for autocomplete items
|
||||||
|
document.body.addEventListener('click', (e) => {
|
||||||
|
const item = e.target.closest('.autocomplete-item');
|
||||||
|
if (item && item.dataset.value) {
|
||||||
|
themeField.value = item.dataset.value;
|
||||||
|
autocompleteDropdown.innerHTML = '';
|
||||||
|
selectedIndex = -1;
|
||||||
|
themeField.dispatchEvent(new Event('input', { bubbles: true }));
|
||||||
|
form.dispatchEvent(new Event('submit', { bubbles: true }));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Close dropdown when clicking outside
|
||||||
|
document.addEventListener('click', (e) => {
|
||||||
|
if (!e.target.closest('.autocomplete-container')) {
|
||||||
|
autocompleteDropdown.innerHTML = '';
|
||||||
|
selectedIndex = -1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keyboard navigation
|
||||||
|
themeField.addEventListener('keydown', (e) => {
|
||||||
|
const items = getItems();
|
||||||
|
const hasItems = items.length > 0;
|
||||||
|
|
||||||
|
if (e.key === 'Escape') {
|
||||||
|
autocompleteDropdown.innerHTML = '';
|
||||||
|
selectedIndex = -1;
|
||||||
|
e.preventDefault();
|
||||||
|
} else if (e.key === 'ArrowDown' && hasItems) {
|
||||||
|
e.preventDefault();
|
||||||
|
const newIndex = selectedIndex < items.length - 1 ? selectedIndex + 1 : 0;
|
||||||
|
selectItem(newIndex);
|
||||||
|
} else if (e.key === 'ArrowUp' && hasItems) {
|
||||||
|
e.preventDefault();
|
||||||
|
const newIndex = selectedIndex > 0 ? selectedIndex - 1 : items.length - 1;
|
||||||
|
selectItem(newIndex);
|
||||||
|
} else if (e.key === 'Enter' && selectedIndex >= 0 && hasItems) {
|
||||||
|
e.preventDefault();
|
||||||
|
applySelectedItem();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mouse hover to highlight items
|
||||||
|
autocompleteDropdown.addEventListener('mouseover', (e) => {
|
||||||
|
const item = e.target.closest('.autocomplete-item');
|
||||||
|
if (item) {
|
||||||
|
const items = getItems();
|
||||||
|
const index = items.indexOf(item);
|
||||||
|
if (index >= 0) {
|
||||||
|
selectItem(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
form.addEventListener('submit', () => {
|
form.addEventListener('submit', () => {
|
||||||
if (!form.dataset.lastTrigger) {
|
if (!form.dataset.lastTrigger) {
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue