feat: theme catalog optimization with tag search and faster enrichment

This commit is contained in:
matt 2025-10-15 17:17:46 -07:00
parent 952b151162
commit 9e6c68f559
26 changed files with 5906 additions and 5688 deletions

View file

@ -153,40 +153,44 @@ def _display_tags_from_entry(entry: Dict[str, Any]) -> List[str]:
def _run_theme_metadata_enrichment(out_func=None) -> None:
"""Run full metadata enrichment sequence after theme catalog/YAML generation.
Idempotent: each script is safe to re-run; errors are swallowed (logged) to avoid
Uses consolidated ThemeEnrichmentPipeline for 5-10x faster processing.
Idempotent: safe to re-run; errors are swallowed (logged) to avoid
impacting primary setup/tagging pipeline. Designed to centralize logic so both
manual refresh (routes/themes.py) and automatic setup flows invoke identical steps.
"""
try:
import os
import sys
import subprocess
root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
scripts_dir = os.path.join(root, 'code', 'scripts')
py = sys.executable
steps: List[List[str]] = [
[py, os.path.join(scripts_dir, 'autofill_min_examples.py')],
[py, os.path.join(scripts_dir, 'pad_min_examples.py'), '--min', os.environ.get('EDITORIAL_MIN_EXAMPLES', '5')],
[py, os.path.join(scripts_dir, 'cleanup_placeholder_examples.py'), '--apply'],
[py, os.path.join(scripts_dir, 'purge_anchor_placeholders.py'), '--apply'],
# Augment YAML with description / popularity buckets from the freshly built catalog
[py, os.path.join(scripts_dir, 'augment_theme_yaml_from_catalog.py')],
[py, os.path.join(scripts_dir, 'generate_theme_editorial_suggestions.py'), '--apply', '--limit-yaml', '0'],
[py, os.path.join(scripts_dir, 'lint_theme_editorial.py')], # non-strict lint pass
]
from pathlib import Path
from code.tagging.theme_enrichment import run_enrichment_pipeline
root = Path(__file__).resolve().parents[3]
min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
def _emit(msg: str):
try:
if out_func:
out_func(msg)
except Exception:
pass
for cmd in steps:
# Run consolidated pipeline instead of 7 separate subprocess scripts
stats = run_enrichment_pipeline(
root=root,
min_examples=min_examples,
write=True,
enforce_min=False, # Non-strict lint pass
strict=False,
progress_callback=_emit,
)
_emit(f"Theme enrichment complete: {stats.total_themes} themes processed")
except Exception as e:
if out_func:
try:
subprocess.run(cmd, check=True)
except Exception as e:
_emit(f"[metadata_enrich] step failed ({os.path.basename(cmd[1]) if len(cmd)>1 else cmd}): {e}")
continue
except Exception:
out_func(f"[metadata_enrich] pipeline failed: {e}")
except Exception:
pass
return
@ -1144,6 +1148,13 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
# Run metadata enrichment (best-effort) after export sequence.
try:
_run_theme_metadata_enrichment(out_func)
# Rebuild theme_list.json to pick up newly generated example_cards/commanders
# from the enrichment pipeline (which populates them from CSV data)
if use_merge and os.path.exists(build_script):
args = [_sys.executable, build_script]
if force:
args.append('--force')
_run(args, check=True)
except Exception:
pass
try: