feat: theme catalog optimization with tag search and faster enrichment

2025-12-17 08:00:13 +01:00 · 2025-10-15 17:17:46 -07:00 · 2025-10-15 17:17:46 -07:00 · 9e6c68f559
commit 9e6c68f559
parent 952b151162
26 changed files with 5906 additions and 5688 deletions
--- a/code/scripts/audit_protection_full_v2.py
+++ b/code/scripts/audit_protection_full_v2.py
@ -1,203 +0,0 @@
-"""
-Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
-
-Created: October 8, 2025
-Purpose: Audit and validate Protection tag precision after implementing grant detection.
-         Can be re-run periodically to check tagging quality.
-
-This script audits ALL Protection-tagged cards and categorizes them:
- Grant: Gives broad protection to other permanents YOU control
- Kindred: Gives protection to specific creature types (metadata tags)
- Mixed: Both broad and kindred/inherent
- Inherent: Only has protection itself
- ConditionalSelf: Only conditionally grants to itself
- Opponent: Grants to opponent's permanents
- Neither: False positive
-
-Outputs:
- m2_audit_v2.json: Full analysis with summary
- m2_audit_v2_grant.csv: Cards for main Protection tag
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
- m2_audit_v2_neither.csv: False positives (exclude)
- m2_audit_v2_all.csv: All cards combined
-"""
-
-import sys
-from pathlib import Path
-import pandas as pd
-import json
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from code.tagging.protection_grant_detection import (
-    categorize_protection_card,
-    get_kindred_protection_tags,
-    is_granting_protection,
-)
-
-def load_all_cards():
-    """Load all cards from color/identity CSV files."""
-    csv_dir = project_root / 'csv_files'
-    
-    # Get all color/identity CSVs (not the raw cards.csv)
-    csv_files = list(csv_dir.glob('*_cards.csv'))
-    csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
-    
-    all_cards = []
-    for csv_file in csv_files:
-        try:
-            df = pd.read_csv(csv_file)
-            all_cards.append(df)
-        except Exception as e:
-            print(f"Warning: Could not load {csv_file.name}: {e}")
-    
-    # Combine all DataFrames
-    combined = pd.concat(all_cards, ignore_index=True)
-    
-    # Drop duplicates (cards appear in multiple color files)
-    combined = combined.drop_duplicates(subset=['name'], keep='first')
-    
-    return combined
-
-def audit_all_protection_cards():
-    """Audit all Protection-tagged cards."""
-    print("Loading all cards...")
-    df = load_all_cards()
-    
-    print(f"Total cards loaded: {len(df)}")
-    
-    # Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
-    df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
-    
-    print(f"Protection-tagged cards: {len(df_prot)}")
-    
-    # Categorize each card
-    categories = []
-    grants_list = []
-    kindred_tags_list = []
-    
-    for idx, row in df_prot.iterrows():
-        name = row['name']
-        text = str(row.get('text', '')).replace('\\n', '\n')  # Convert escaped newlines to real newlines
-        keywords = str(row.get('keywords', ''))
-        card_type = str(row.get('type', ''))
-        
-        # Categorize with kindred exclusion enabled
-        category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
-        
-        # Check if it grants broadly
-        grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
-        
-        # Get kindred tags
-        kindred_tags = get_kindred_protection_tags(text)
-        
-        categories.append(category)
-        grants_list.append(grants_broad)
-        kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
-    
-    df_prot['category'] = categories
-    df_prot['grants_broad'] = grants_list
-    df_prot['kindred_tags'] = kindred_tags_list
-    
-    # Generate summary (convert numpy types to native Python for JSON serialization)
-    summary = {
-        'total': int(len(df_prot)),
-        'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
-        'grants_broad_count': int(df_prot['grants_broad'].sum()),
-        'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
-    }
-    
-    # Calculate keep vs remove
-    keep_categories = {'Grant', 'Mixed'}
-    kindred_only = df_prot[df_prot['category'] == 'Kindred']
-    keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
-    remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
-    
-    summary['keep_main_tag'] = keep_count
-    summary['kindred_metadata'] = len(kindred_only)
-    summary['remove'] = remove_count
-    summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
-    
-    # Print summary
-    print(f"\n{'='*60}")
-    print("AUDIT SUMMARY")
-    print(f"{'='*60}")
-    print(f"Total Protection-tagged cards: {summary['total']}")
-    print(f"\nCategories:")
-    for cat, count in sorted(summary['categories'].items()):
-        pct = (count / summary['total']) * 100
-        print(f"  {cat:20s} {count:4d} ({pct:5.1f}%)")
-    
-    print(f"\n{'='*60}")
-    print(f"Main Protection tag:  {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
-    print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
-    print(f"Remove:               {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
-    print(f"{'='*60}")
-    print(f"Precision estimate:   {summary['precision_estimate']}%")
-    print(f"{'='*60}\n")
-    
-    # Export results
-    output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
-    output_dir.mkdir(parents=True, exist_ok=True)
-    
-    # Export JSON summary
-    with open(output_dir / 'm2_audit_v2.json', 'w') as f:
-        json.dump({
-            'summary': summary,
-            'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
-        }, f, indent=2)
-    
-    # Export CSVs by category
-    export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
-    
-    # Grant category
-    df_grant = df_prot[df_prot['category'] == 'Grant']
-    df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
-    print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
-    
-    # Kindred category
-    df_kindred = df_prot[df_prot['category'] == 'Kindred']
-    df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
-    print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
-    
-    # Mixed category
-    df_mixed = df_prot[df_prot['category'] == 'Mixed']
-    df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
-    print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
-    
-    # ConditionalSelf category
-    df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
-    df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
-    print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
-    
-    # Inherent category
-    df_inherent = df_prot[df_prot['category'] == 'Inherent']
-    df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
-    print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
-    
-    # Opponent category
-    df_opponent = df_prot[df_prot['category'] == 'Opponent']
-    df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
-    print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
-    
-    # Neither category
-    df_neither = df_prot[df_prot['category'] == 'Neither']
-    df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
-    print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
-    
-    # All cards
-    df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
-    print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
-    
-    print(f"\nAll files saved to: {output_dir}")
-    
-    return df_prot, summary
-
-if __name__ == '__main__':
-    df_results, summary = audit_all_protection_cards()
--- a/code/scripts/check_random_theme_perf.py
+++ b/code/scripts/check_random_theme_perf.py
@ -1,118 +0,0 @@
-"""Opt-in guard that compares multi-theme filter performance to a stored baseline.
-
-Run inside the project virtual environment:
-
-    python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json
-
-The script executes the same profiling loop as `profile_multi_theme_filter` and fails
-if the observed mean or p95 timings regress more than the allowed threshold.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import sys
-from pathlib import Path
-from typing import Any, Dict, Tuple
-
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"
-
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.append(str(PROJECT_ROOT))
-
-from code.scripts.profile_multi_theme_filter import run_profile  # type: ignore  # noqa: E402
-
-
-def _load_baseline(path: Path) -> Dict[str, Any]:
-    if not path.exists():
-        raise FileNotFoundError(f"Baseline file not found: {path}")
-    data = json.loads(path.read_text(encoding="utf-8"))
-    return data
-
-
-def _extract(metric: Dict[str, Any], key: str) -> float:
-    try:
-        value = float(metric.get(key, 0.0))
-    except Exception:
-        value = 0.0
-    return value
-
-
-def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
-    a_mean = _extract(actual, "mean_ms")
-    b_mean = _extract(baseline, "mean_ms")
-    a_p95 = _extract(actual, "p95_ms")
-    b_p95 = _extract(baseline, "p95_ms")
-
-    allowed_mean = b_mean * (1.0 + threshold)
-    allowed_p95 = b_p95 * (1.0 + threshold)
-
-    mean_ok = a_mean <= allowed_mean or b_mean == 0.0
-    p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0
-
-    status = mean_ok and p95_ok
-
-    def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
-        trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
-        trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
-        limit_str = f"≤ {allowed_val:.3f}ms" if baseline_val else "n/a"
-        return f"    {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"
-
-    rows = [f"Section: {name}"]
-    rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
-    rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
-    return status, "\n".join(rows)
-
-
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
-    parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
-    parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
-    parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
-    parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
-    parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
-    args = parser.parse_args(argv)
-
-    baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
-    if args.update_baseline and not baseline_path.parent.exists():
-        baseline_path.parent.mkdir(parents=True, exist_ok=True)
-
-    if not args.update_baseline:
-        baseline = _load_baseline(baseline_path)
-    else:
-        baseline = {}
-
-    results = run_profile(args.iterations, args.seed)
-
-    cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
-    synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)
-
-    print("Iterations:", results.get("iterations"))
-    print("Seed:", results.get("seed"))
-    print(cascade_report)
-    print(synergy_report)
-
-    overall_ok = cascade_status and synergy_status
-
-    if args.update_baseline:
-        payload = {
-            "iterations": results.get("iterations"),
-            "seed": results.get("seed"),
-            "cascade": results.get("cascade"),
-            "synergy": results.get("synergy"),
-        }
-        baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-        print(f"Baseline updated → {baseline_path}")
-        return 0
-
-    if not overall_ok:
-        print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
-        return 1
-
-    print("PASS: performance within allowed threshold")
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main())
--- a/code/scripts/enrich_themes.py
+++ b/code/scripts/enrich_themes.py
@ -0,0 +1,135 @@
+"""CLI wrapper for theme enrichment pipeline.
+
+Runs the consolidated theme enrichment pipeline with command-line options.
+For backward compatibility, individual scripts can still be run separately,
+but this provides a faster single-pass alternative.
+
+Usage:
+    python code/scripts/enrich_themes.py --write
+    python code/scripts/enrich_themes.py --dry-run --enforce-min
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+ROOT = Path(__file__).resolve().parents[2]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+# Import after adding to path
+from code.tagging.theme_enrichment import run_enrichment_pipeline  # noqa: E402
+
+
+def main() -> int:
+    """Run theme enrichment pipeline from CLI."""
+    parser = argparse.ArgumentParser(
+        description='Consolidated theme metadata enrichment pipeline',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Dry run (no changes written):
+  python code/scripts/enrich_themes.py --dry-run
+  
+  # Write changes:
+  python code/scripts/enrich_themes.py --write
+  
+  # Enforce minimum examples (errors if insufficient):
+  python code/scripts/enrich_themes.py --write --enforce-min
+  
+  # Strict validation for cornerstone themes:
+  python code/scripts/enrich_themes.py --write --strict
+
+Note: This replaces running 7 separate scripts (autofill, pad, cleanup, purge,
+augment, suggestions, lint) with a single 5-10x faster operation.
+        """
+    )
+    
+    parser.add_argument(
+        '--write',
+        action='store_true',
+        help='Write changes to disk (default: dry run)'
+    )
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Dry run mode: show what would be changed without writing'
+    )
+    parser.add_argument(
+        '--min',
+        '--min-examples',
+        type=int,
+        default=None,
+        metavar='N',
+        help='Minimum number of example commanders (default: $EDITORIAL_MIN_EXAMPLES or 5)'
+    )
+    parser.add_argument(
+        '--enforce-min',
+        action='store_true',
+        help='Treat minimum examples violations as errors'
+    )
+    parser.add_argument(
+        '--strict',
+        action='store_true',
+        help='Enable strict validation (cornerstone themes must have examples)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Determine write mode
+    if args.dry_run:
+        write = False
+    elif args.write:
+        write = True
+    else:
+        # Default to dry run if neither specified
+        write = False
+        print("Note: Running in dry-run mode (use --write to save changes)\n")
+    
+    # Get minimum examples threshold
+    if args.min is not None:
+        min_examples = args.min
+    else:
+        min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
+    
+    print("Theme Enrichment Pipeline")
+    print("========================")
+    print(f"Mode: {'WRITE' if write else 'DRY RUN'}")
+    print(f"Min examples: {min_examples}")
+    print(f"Enforce min: {args.enforce_min}")
+    print(f"Strict: {args.strict}")
+    print()
+    
+    try:
+        stats = run_enrichment_pipeline(
+            root=ROOT,
+            min_examples=min_examples,
+            write=write,
+            enforce_min=args.enforce_min,
+            strict=args.strict,
+            progress_callback=None,  # Use default print
+        )
+        
+        # Return non-zero if there are lint errors
+        if stats.lint_errors > 0:
+            print(f"\n❌ Enrichment completed with {stats.lint_errors} error(s)")
+            return 1
+        
+        print("\n✅ Enrichment completed successfully")
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        return 130
+    except Exception as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        if '--debug' in sys.argv:
+            raise
+        return 1
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
--- a/code/scripts/export_themes_to_yaml.py
+++ b/code/scripts/export_themes_to_yaml.py
@ -123,6 +123,9 @@ def main():
        enforced_set = set(enforced_synergies)
        inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]

+        example_cards_value = entry.get('example_cards', [])
+        example_commanders_value = entry.get('example_commanders', [])
+        
        doc = {
            'id': slug,
            'display_name': theme_name,
@ -132,13 +135,40 @@ def main():
            'inferred_synergies': inferred_synergies,
            'primary_color': entry.get('primary_color'),
            'secondary_color': entry.get('secondary_color'),
+            'example_cards': example_cards_value,
+            'example_commanders': example_commanders_value,
+            'synergy_example_cards': entry.get('synergy_example_cards', []),
+            'synergy_commanders': entry.get('synergy_commanders', []),
+            'deck_archetype': entry.get('deck_archetype'),
+            'popularity_hint': entry.get('popularity_hint'),
+            'popularity_bucket': entry.get('popularity_bucket'),
+            'editorial_quality': entry.get('editorial_quality'),
+            'description': entry.get('description'),
            'notes': ''
        }
-        # Drop None color keys for cleanliness
+        # Drop None/empty keys for cleanliness
        if doc['primary_color'] is None:
            doc.pop('primary_color')
        if doc.get('secondary_color') is None:
            doc.pop('secondary_color')
+        if not doc.get('example_cards'):
+            doc.pop('example_cards')
+        if not doc.get('example_commanders'):
+            doc.pop('example_commanders')
+        if not doc.get('synergy_example_cards'):
+            doc.pop('synergy_example_cards')
+        if not doc.get('synergy_commanders'):
+            doc.pop('synergy_commanders')
+        if doc.get('deck_archetype') is None:
+            doc.pop('deck_archetype')
+        if doc.get('popularity_hint') is None:
+            doc.pop('popularity_hint')
+        if doc.get('popularity_bucket') is None:
+            doc.pop('popularity_bucket')
+        if doc.get('editorial_quality') is None:
+            doc.pop('editorial_quality')
+        if doc.get('description') is None:
+            doc.pop('description')
        with path.open('w', encoding='utf-8') as f:
            yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
        exported += 1
--- a/code/scripts/generate_theme_catalog.py
+++ b/code/scripts/generate_theme_catalog.py
@ -19,6 +19,13 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Sequence

+try:
+    import pandas as pd
+    HAS_PANDAS = True
+except ImportError:
+    HAS_PANDAS = False
+    pd = None  # type: ignore
+
 ROOT = Path(__file__).resolve().parents[2]
 CODE_ROOT = ROOT / "code"
 if str(CODE_ROOT) not in sys.path:
@ -29,6 +36,9 @@ try:
 except Exception:  # pragma: no cover - fallback for adhoc execution
    DEFAULT_CSV_DIRECTORY = "csv_files"

+# Parquet support requires pandas (imported at top of file, uses pyarrow under the hood)
+HAS_PARQUET_SUPPORT = HAS_PANDAS
+
 DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
 HEADER_COMMENT_PREFIX = "# theme_catalog"

@ -87,7 +97,68 @@ def parse_theme_tags(value: object) -> List[str]:
    return []


+def _load_theme_counts_from_parquet(
+    parquet_path: Path,
+    theme_variants: Dict[str, set[str]]
+) -> Counter[str]:
+    """Load theme counts from a parquet file using pandas (which uses pyarrow).
+    
+    Args:
+        parquet_path: Path to the parquet file (commander_cards.parquet or all_cards.parquet)
+        theme_variants: Dict to accumulate theme name variants
+        
+    Returns:
+        Counter of theme occurrences
+    """
+    if pd is None:
+        return Counter()
+    
+    counts: Counter[str] = Counter()
+    
+    if not parquet_path.exists():
+        return counts
+    
+    # Read only themeTags column for efficiency
+    try:
+        df = pd.read_parquet(parquet_path, columns=["themeTags"])
+    except Exception:
+        # If themeTags column doesn't exist, return empty
+        return counts
+    
+    # Convert to list for fast iteration (faster than iterrows)
+    theme_tags_list = df["themeTags"].tolist()
+    
+    for raw_value in theme_tags_list:
+        if raw_value is None or (isinstance(raw_value, float) and pd.isna(raw_value)):
+            continue
+        tags = parse_theme_tags(raw_value)
+        if not tags:
+            continue
+        seen_in_row: set[str] = set()
+        for tag in tags:
+            display = normalize_theme_display(tag)
+            if not display:
+                continue
+            key = canonical_key(display)
+            if key in seen_in_row:
+                continue
+            seen_in_row.add(key)
+            counts[key] += 1
+            theme_variants[key].add(display)
+    
+    return counts
+
+
 def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
+    """Load theme counts from CSV file (fallback method).
+    
+    Args:
+        csv_path: Path to CSV file
+        theme_variants: Dict to accumulate theme name variants
+        
+    Returns:
+        Counter of theme occurrences
+    """
    counts: Counter[str] = Counter()
    if not csv_path.exists():
        return counts
@ -146,24 +217,67 @@ def build_theme_catalog(
    commander_filename: str = "commander_cards.csv",
    cards_filename: str = "cards.csv",
    logs_directory: Optional[Path] = None,
+    use_parquet: bool = True,
 ) -> CatalogBuildResult:
+    """Build theme catalog from card data.
+    
+    Args:
+        csv_directory: Directory containing CSV files (fallback)
+        output_path: Where to write the catalog CSV
+        generated_at: Optional timestamp for generation
+        commander_filename: Name of commander CSV file
+        cards_filename: Name of cards CSV file
+        logs_directory: Optional directory to copy output to
+        use_parquet: If True, try to use all_cards.parquet first (default: True)
+        
+    Returns:
+        CatalogBuildResult with generated rows and metadata
+    """
    csv_directory = csv_directory.resolve()
    output_path = output_path.resolve()

    theme_variants: Dict[str, set[str]] = defaultdict(set)

-    commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
+    # Try to use parquet file first (much faster)
+    used_parquet = False
+    if use_parquet and HAS_PARQUET_SUPPORT:
+        try:
+            # Use dedicated parquet files (matches CSV structure exactly)
+            parquet_dir = csv_directory.parent / "card_files"
+            
+            # Load commander counts directly from commander_cards.parquet
+            commander_parquet = parquet_dir / "commander_cards.parquet"
+            commander_counts = _load_theme_counts_from_parquet(
+                commander_parquet, theme_variants=theme_variants
+            )
+            
+            # CSV method doesn't load non-commander cards, so we don't either
+            card_counts = Counter()
+            
+            used_parquet = True
+            print("✓ Loaded theme data from parquet files")
+            
+        except Exception as e:
+            print(f"⚠ Failed to load from parquet: {e}")
+            print("  Falling back to CSV files...")
+            used_parquet = False
+    
+    # Fallback to CSV files if parquet not available or failed
+    if not used_parquet:
+        commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)

-    card_counts: Counter[str] = Counter()
-    cards_path = csv_directory / cards_filename
-    if cards_path.exists():
-        card_counts = _load_theme_counts(cards_path, theme_variants)
-    else:
-        # Fallback: scan all *_cards.csv except commander
-        for candidate in csv_directory.glob("*_cards.csv"):
-            if candidate.name == commander_filename:
-                continue
-            card_counts += _load_theme_counts(candidate, theme_variants)
+        card_counts: Counter[str] = Counter()
+        cards_path = csv_directory / cards_filename
+        if cards_path.exists():
+            card_counts = _load_theme_counts(cards_path, theme_variants)
+        else:
+            # Fallback: scan all *_cards.csv except commander
+            for candidate in csv_directory.glob("*_cards.csv"):
+                if candidate.name == commander_filename:
+                    continue
+                card_counts += _load_theme_counts(candidate, theme_variants)
+        
+        print("✓ Loaded theme data from CSV files")

    keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
    generated_at_iso = _derive_generated_at(generated_at)
--- a/code/scripts/preview_dfc_catalog_diff.py
+++ b/code/scripts/preview_dfc_catalog_diff.py
@ -1,305 +0,0 @@
-"""Catalog diff helper for verifying multi-face merge output.
-
-This utility regenerates the card CSV catalog (optionally writing compatibility
-snapshots) and then compares the merged outputs against the baseline snapshots.
-It is intended to support the MDFC rollout checklist by providing a concise summary
-of how many rows were merged, which cards collapsed into a single record, and
-whether any tag unions diverge from expectations.
-
-Example usage (from repo root, inside virtualenv):
-
-    python -m code.scripts.preview_dfc_catalog_diff --compat-snapshot --output logs/dfc_catalog_diff.json
-
-The script prints a human readable summary to stdout and optionally writes a JSON
-artifact for release/staging review.
-"""
-from __future__ import annotations
-
-import argparse
-import ast
-import importlib
-import json
-import os
-import sys
-import time
-from collections import Counter
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Sequence
-
-import pandas as pd
-
-from settings import COLORS, CSV_DIRECTORY
-
-DEFAULT_COMPAT_DIR = Path(os.getenv("DFC_COMPAT_DIR", "csv_files/compat_faces"))
-CSV_ROOT = Path(CSV_DIRECTORY)
-
-
-def _parse_list_cell(value: Any) -> List[str]:
-    """Convert serialized list cells ("['A', 'B']") into Python lists."""
-    if isinstance(value, list):
-        return [str(item) for item in value]
-    if value is None:
-        return []
-    if isinstance(value, float) and pd.isna(value):  # type: ignore[arg-type]
-        return []
-    text = str(value).strip()
-    if not text:
-        return []
-    try:
-        parsed = ast.literal_eval(text)
-    except (SyntaxError, ValueError):
-        return [text]
-    if isinstance(parsed, list):
-        return [str(item) for item in parsed]
-    return [str(parsed)]
-
-
-def _load_catalog(path: Path) -> pd.DataFrame:
-    if not path.exists():
-        raise FileNotFoundError(f"Catalog file missing: {path}")
-    df = pd.read_csv(path)
-    for column in ("themeTags", "keywords", "creatureTypes"):
-        if column in df.columns:
-            df[column] = df[column].apply(_parse_list_cell)
-    return df
-
-
-def _multi_face_names(df: pd.DataFrame) -> List[str]:
-    counts = Counter(df.get("name", []))
-    return [name for name, count in counts.items() if isinstance(name, str) and count > 1]
-
-
-def _collect_tags(series: Iterable[List[str]]) -> List[str]:
-    tags: List[str] = []
-    for value in series:
-        if isinstance(value, list):
-            tags.extend(str(item) for item in value)
-    return sorted(set(tags))
-
-
-def _summarize_color(
-    color: str,
-    merged: pd.DataFrame,
-    baseline: pd.DataFrame,
-    sample_size: int,
-) -> Dict[str, Any]:
-    merged_names = set(merged.get("name", []))
-    baseline_names = list(baseline.get("name", []))
-    baseline_name_set = set(name for name in baseline_names if isinstance(name, str))
-
-    multi_face = _multi_face_names(baseline)
-    collapsed = []
-    tag_mismatches: List[str] = []
-    missing_after_merge: List[str] = []
-
-    for name in multi_face:
-        group = baseline[baseline["name"] == name]
-        merged_row = merged[merged["name"] == name]
-        if merged_row.empty:
-            missing_after_merge.append(name)
-            continue
-        expected_tags = _collect_tags(group["themeTags"]) if "themeTags" in group else []
-        merged_tags = _collect_tags(merged_row.iloc[[0]]["themeTags"]) if "themeTags" in merged_row else []
-        if expected_tags != merged_tags:
-            tag_mismatches.append(name)
-        collapsed.append(name)
-
-    removed_names = sorted(baseline_name_set - merged_names)
-    added_names = sorted(merged_names - baseline_name_set)
-
-    return {
-        "rows_merged": len(merged),
-        "rows_baseline": len(baseline),
-        "row_delta": len(merged) - len(baseline),
-        "multi_face_groups": len(multi_face),
-        "collapsed_sample": collapsed[:sample_size],
-        "tag_union_mismatches": tag_mismatches[:sample_size],
-        "missing_after_merge": missing_after_merge[:sample_size],
-        "removed_names": removed_names[:sample_size],
-        "added_names": added_names[:sample_size],
-    }
-
-
-def _refresh_catalog(colors: Sequence[str], compat_snapshot: bool) -> None:
-    os.environ.pop("ENABLE_DFC_MERGE", None)
-    os.environ["DFC_COMPAT_SNAPSHOT"] = "1" if compat_snapshot else "0"
-    importlib.invalidate_caches()
-    # Reload tagger to pick up the new env var
-    tagger = importlib.import_module("code.tagging.tagger")
-    tagger = importlib.reload(tagger)  # type: ignore[assignment]
-
-    for color in colors:
-        tagger.load_dataframe(color)
-
-
-def generate_diff(
-    colors: Sequence[str],
-    compat_dir: Path,
-    sample_size: int,
-) -> Dict[str, Any]:
-    per_color: Dict[str, Any] = {}
-    overall = {
-        "total_rows_merged": 0,
-        "total_rows_baseline": 0,
-        "total_multi_face_groups": 0,
-        "colors": len(colors),
-        "tag_union_mismatches": 0,
-        "missing_after_merge": 0,
-    }
-
-    for color in colors:
-        merged_path = CSV_ROOT / f"{color}_cards.csv"
-        baseline_path = compat_dir / f"{color}_cards_unmerged.csv"
-        merged_df = _load_catalog(merged_path)
-        baseline_df = _load_catalog(baseline_path)
-        summary = _summarize_color(color, merged_df, baseline_df, sample_size)
-        per_color[color] = summary
-        overall["total_rows_merged"] += summary["rows_merged"]
-        overall["total_rows_baseline"] += summary["rows_baseline"]
-        overall["total_multi_face_groups"] += summary["multi_face_groups"]
-        overall["tag_union_mismatches"] += len(summary["tag_union_mismatches"])
-        overall["missing_after_merge"] += len(summary["missing_after_merge"])
-
-    overall["row_delta_total"] = overall["total_rows_merged"] - overall["total_rows_baseline"]
-    return {"overall": overall, "per_color": per_color}
-
-
-def main(argv: List[str]) -> int:
-    parser = argparse.ArgumentParser(description="Preview merged vs baseline DFC catalog diff")
-    parser.add_argument(
-        "--skip-refresh",
-        action="store_true",
-        help="Skip rebuilding the catalog in compatibility mode (requires existing compat snapshots)",
-    )
-    parser.add_argument(
-        "--mode",
-        default="",
-        help="[Deprecated] Legacy ENABLE_DFC_MERGE value (compat|1|0 etc.)",
-    )
-    parser.add_argument(
-        "--compat-snapshot",
-        dest="compat_snapshot",
-        action="store_true",
-        help="Write compatibility snapshots before diffing (default: off unless legacy --mode compat)",
-    )
-    parser.add_argument(
-        "--no-compat-snapshot",
-        dest="compat_snapshot",
-        action="store_false",
-        help="Skip compatibility snapshots even if legacy --mode compat is supplied",
-    )
-    parser.set_defaults(compat_snapshot=None)
-    parser.add_argument(
-        "--colors",
-        nargs="*",
-        help="Optional subset of colors to diff (defaults to full COLORS list)",
-    )
-    parser.add_argument(
-        "--compat-dir",
-        type=Path,
-        default=DEFAULT_COMPAT_DIR,
-        help="Directory containing unmerged compatibility snapshots (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--output",
-        type=Path,
-        help="Optional JSON file to write with the diff summary",
-    )
-    parser.add_argument(
-        "--sample-size",
-        type=int,
-        default=10,
-        help="Number of sample entries to include per section (default: %(default)s)",
-    )
-    args = parser.parse_args(argv)
-
-    colors = tuple(args.colors) if args.colors else tuple(COLORS)
-    compat_dir = args.compat_dir
-
-    mode = str(args.mode or "").strip().lower()
-    if mode and mode not in {"compat", "dual", "both", "1", "on", "true", "0", "off", "false", "disabled"}:
-        print(
-            f"ℹ Legacy --mode value '{mode}' detected; merge remains enabled. Use --compat-snapshot as needed.",
-            flush=True,
-        )
-
-    if args.compat_snapshot is None:
-        compat_snapshot = mode in {"compat", "dual", "both"}
-    else:
-        compat_snapshot = args.compat_snapshot
-        if mode:
-            print(
-                "ℹ Ignoring deprecated --mode value because --compat-snapshot/--no-compat-snapshot was supplied.",
-                flush=True,
-            )
-
-    if mode in {"0", "off", "false", "disabled"}:
-        print(
-            "⚠ ENABLE_DFC_MERGE=off is deprecated; the merge remains enabled regardless of the value.",
-            flush=True,
-        )
-
-    if not args.skip_refresh:
-        start = time.perf_counter()
-        _refresh_catalog(colors, compat_snapshot)
-        duration = time.perf_counter() - start
-        snapshot_msg = "with compat snapshot" if compat_snapshot else "merged-only"
-        print(f"✔ Refreshed catalog in {duration:.1f}s ({snapshot_msg})")
-    else:
-        print("ℹ Using existing catalog outputs (refresh skipped)")
-
-    try:
-        diff = generate_diff(colors, compat_dir, args.sample_size)
-    except FileNotFoundError as exc:
-        print(f"ERROR: {exc}")
-        print("Run without --skip-refresh (or ensure compat snapshots exist).", file=sys.stderr)
-        return 2
-
-    overall = diff["overall"]
-    print("\n=== DFC Catalog Diff Summary ===")
-    print(
-        f"Merged rows: {overall['total_rows_merged']:,} | Baseline rows: {overall['total_rows_baseline']:,} | "
-        f"Δ rows: {overall['row_delta_total']:,}"
-    )
-    print(
-        f"Multi-face groups: {overall['total_multi_face_groups']:,} | "
-        f"Tag union mismatches: {overall['tag_union_mismatches']} | Missing after merge: {overall['missing_after_merge']}"
-    )
-
-    for color, summary in diff["per_color"].items():
-        print(f"\n[{color}] baseline={summary['rows_baseline']} merged={summary['rows_merged']} Δ={summary['row_delta']}")
-        if summary["multi_face_groups"]:
-            print(f"  multi-face groups: {summary['multi_face_groups']}")
-        if summary["collapsed_sample"]:
-            sample = ", ".join(summary["collapsed_sample"][:3])
-            print(f"  collapsed sample: {sample}")
-        if summary["tag_union_mismatches"]:
-            print(f"  TAG MISMATCH sample: {', '.join(summary['tag_union_mismatches'])}")
-        if summary["missing_after_merge"]:
-            print(f"  MISSING sample: {', '.join(summary['missing_after_merge'])}")
-        if summary["removed_names"]:
-            print(f"  removed sample: {', '.join(summary['removed_names'])}")
-        if summary["added_names"]:
-            print(f"  added sample: {', '.join(summary['added_names'])}")
-
-    if args.output:
-        payload = {
-            "captured_at": int(time.time()),
-            "mode": args.mode,
-            "colors": colors,
-            "compat_dir": str(compat_dir),
-            "summary": diff,
-        }
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            args.output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
-            print(f"\n📄 Wrote JSON summary to {args.output}")
-        except Exception as exc:  # pragma: no cover
-            print(f"Failed to write output file {args.output}: {exc}", file=sys.stderr)
-            return 3
-
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_metrics_snapshot.py
+++ b/code/scripts/preview_metrics_snapshot.py
@ -1,105 +0,0 @@
-"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
-
-Usage (from repo root virtualenv):
-  python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
-
-Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
-human-readable summary to stdout.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import sys
-import time
-from pathlib import Path
-from typing import Any, Dict
-
-import urllib.request
-import urllib.error
-
-DEFAULT_URL = "http://localhost:8000/themes/metrics"
-
-
-def fetch_metrics(url: str) -> Dict[str, Any]:
-    req = urllib.request.Request(url, headers={"Accept": "application/json"})
-    with urllib.request.urlopen(req, timeout=10) as resp:  # nosec B310 (local trusted)
-        data = resp.read().decode("utf-8", "replace")
-    try:
-        return json.loads(data)  # type: ignore[return-value]
-    except json.JSONDecodeError as e:  # pragma: no cover - unlikely if server OK
-        raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
-
-
-def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
-    preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
-    per_theme = preview.get("per_theme") or {}
-    # Compute top slow themes by avg_ms
-    items = []
-    for slug, info in per_theme.items():
-        if not isinstance(info, dict):
-            continue
-        avg = info.get("avg_ms")
-        if isinstance(avg, (int, float)):
-            items.append((slug, float(avg), info))
-    items.sort(key=lambda x: x[1], reverse=True)
-    top = items[:top_n]
-    return {
-        "preview_requests": preview.get("preview_requests"),
-        "preview_cache_hits": preview.get("preview_cache_hits"),
-        "preview_avg_build_ms": preview.get("preview_avg_build_ms"),
-        "preview_p95_build_ms": preview.get("preview_p95_build_ms"),
-        "preview_ttl_seconds": preview.get("preview_ttl_seconds"),
-        "editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
-        "top_slowest": [
-            {
-                "slug": slug,
-                "avg_ms": avg,
-                "p95_ms": info.get("p95_ms"),
-                "builds": info.get("builds"),
-                "requests": info.get("requests"),
-                "avg_curated_pct": info.get("avg_curated_pct"),
-            }
-            for slug, avg, info in top
-        ],
-    }
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Snapshot preview metrics")
-    ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
-    ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
-    ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
-    ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
-    args = ap.parse_args(argv)
-
-    try:
-        raw = fetch_metrics(args.url)
-    except urllib.error.URLError as e:
-        print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
-        return 2
-
-    summary = summarize(raw, args.limit)
-    snapshot = {
-        "captured_at": int(time.time()),
-        "source": args.url,
-        "summary": summary,
-    }
-
-    if args.output:
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
-        except Exception as e:  # pragma: no cover
-            print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
-            return 3
-
-    if not args.quiet:
-        print("Preview Metrics Snapshot:")
-        print(json.dumps(summary, indent=2))
-
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_benchmark.py
+++ b/code/scripts/preview_perf_benchmark.py
@ -1,349 +0,0 @@
-"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
-
-Runs warm-up plus measured request loops against several theme slugs and prints
-aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
-establish or validate that refactor did not introduce >5% p95 regression.
-
-Usage (ensure server running locally – commonly :8080 in docker compose):
-    python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
-            --url http://localhost:8080 --warm 1 --limit 12
-
-Theme slug discovery hierarchy (when --theme not provided):
-    1. Try /themes/index.json (legacy / planned static index)
-    2. Fallback to /themes/api/themes (current API) and take the first N ids
-The discovered slugs are sorted deterministically then truncated to N.
-
-NOTE: This is intentionally minimal (no external deps). For stable comparisons
-run with identical parameters pre/post-change and commit the JSON output under
-logs/perf/.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import statistics
-import time
-from typing import Any, Dict, List
-import urllib.request
-import urllib.error
-import sys
-from pathlib import Path
-
-
-def _fetch_json(url: str) -> Dict[str, Any]:
-    req = urllib.request.Request(url, headers={"Accept": "application/json"})
-    with urllib.request.urlopen(req, timeout=15) as resp:  # nosec B310 local dev
-        data = resp.read().decode("utf-8", "replace")
-    return json.loads(data)  # type: ignore[return-value]
-
-
-def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]:
-    last_error: Exception | None = None
-    for attempt in range(1, attempts + 1):
-        try:
-            return _fetch_json(url)
-        except Exception as exc:  # pragma: no cover - network variability
-            last_error = exc
-            if attempt < attempts:
-                print(json.dumps({  # noqa: T201
-                    "event": "preview_perf_fetch_retry",
-                    "url": url,
-                    "attempt": attempt,
-                    "max_attempts": attempts,
-                    "error": str(exc),
-                }))
-                time.sleep(delay * attempt)
-            else:
-                raise
-    raise last_error  # pragma: no cover - defensive; should be unreachable
-
-
-def select_theme_slugs(base_url: str, count: int) -> List[str]:
-    """Discover theme slugs for benchmarking.
-
-    Attempts legacy static index first, then falls back to live API listing.
-    """
-    errors: List[str] = []
-    slugs: List[str] = []
-    # Attempt 1: legacy /themes/index.json
-    try:
-        idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
-        entries = idx.get("themes") or []
-        for it in entries:
-            if not isinstance(it, dict):
-                continue
-            slug = it.get("slug") or it.get("id") or it.get("theme_id")
-            if isinstance(slug, str):
-                slugs.append(slug)
-    except Exception as e:  # pragma: no cover - network variability
-        errors.append(f"index.json failed: {e}")
-
-    if not slugs:
-        # Attempt 2: live API listing
-        try:
-            listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
-            items = listing.get("items") or []
-            for it in items:
-                if not isinstance(it, dict):
-                    continue
-                tid = it.get("id") or it.get("slug") or it.get("theme_id")
-                if isinstance(tid, str):
-                    slugs.append(tid)
-        except Exception as e:  # pragma: no cover - network variability
-            errors.append(f"api/themes failed: {e}")
-
-    slugs = sorted(set(slugs))[:count]
-    if not slugs:
-        raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
-    return slugs
-
-
-def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
-    """Fetch all theme slugs via paginated /themes/api/themes endpoint.
-
-    Uses maximum page size (200) and iterates using offset until no next page.
-    Returns deterministic sorted unique list of slugs.
-    """
-    slugs: List[str] = []
-    offset = 0
-    seen: set[str] = set()
-    page_attempts = 5
-    page_delay = 1.2
-    while True:
-        url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
-        data: Dict[str, Any] | None = None
-        last_error: Exception | None = None
-        for attempt in range(1, page_attempts + 1):
-            try:
-                data = _fetch_json_with_retry(url, attempts=4, delay=0.75)
-                break
-            except Exception as exc:  # pragma: no cover - network variability
-                last_error = exc
-                if attempt < page_attempts:
-                    print(json.dumps({  # noqa: T201
-                        "event": "preview_perf_page_retry",
-                        "offset": offset,
-                        "attempt": attempt,
-                        "max_attempts": page_attempts,
-                        "error": str(exc),
-                    }))
-                    time.sleep(page_delay * attempt)
-                else:
-                    raise SystemExit(f"Failed fetching themes page offset={offset}: {exc}")
-        if data is None:  # pragma: no cover - defensive
-            raise SystemExit(f"Failed fetching themes page offset={offset}: {last_error}")
-        items = data.get("items") or []
-        for it in items:
-            if not isinstance(it, dict):
-                continue
-            tid = it.get("id") or it.get("slug") or it.get("theme_id")
-            if isinstance(tid, str) and tid not in seen:
-                seen.add(tid)
-                slugs.append(tid)
-        next_offset = data.get("next_offset")
-        if not next_offset or next_offset == offset:
-            break
-        offset = int(next_offset)
-    return sorted(slugs)
-
-
-def percentile(values: List[float], pct: float) -> float:
-    if not values:
-        return 0.0
-    sv = sorted(values)
-    k = (len(sv) - 1) * pct
-    f = int(k)
-    c = min(f + 1, len(sv) - 1)
-    if f == c:
-        return sv[f]
-    d0 = sv[f] * (c - k)
-    d1 = sv[c] * (k - f)
-    return d0 + d1
-
-
-def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
-    latencies: List[float] = []
-    per_slug_counts = {s: 0 for s in slugs}
-    t_start = time.time()
-    for i in range(loops):
-        slug = slugs[i % len(slugs)]
-        # path_template may contain {slug} and {limit}
-        try:
-            rel = path_template.format(slug=slug, limit=limit)
-        except Exception:
-            rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
-        if not rel.startswith('/'):
-            rel = '/' + rel
-        url = f"{base_url.rstrip('/')}{rel}"
-        t0 = time.time()
-        try:
-            _fetch_json(url)
-        except Exception as e:
-            print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)}))  # noqa: T201
-            continue
-        ms = (time.time() - t0) * 1000.0
-        latencies.append(ms)
-        per_slug_counts[slug] += 1
-    elapsed = time.time() - t_start
-    return {
-        "warm": warm,
-        "loops": loops,
-        "slugs": slugs,
-        "per_slug_requests": per_slug_counts,
-        "elapsed_s": round(elapsed, 3),
-        "p50_ms": round(percentile(latencies, 0.50), 2),
-        "p90_ms": round(percentile(latencies, 0.90), 2),
-        "p95_ms": round(percentile(latencies, 0.95), 2),
-        "avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
-        "count": len(latencies),
-        "_latencies": latencies,  # internal (removed in final result unless explicitly retained)
-    }
-
-
-def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
-    if not latencies:
-        return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
-    return {
-        "count": len(latencies),
-        "p50_ms": round(percentile(latencies, 0.50), 2),
-        "p90_ms": round(percentile(latencies, 0.90), 2),
-        "p95_ms": round(percentile(latencies, 0.95), 2),
-        "avg_ms": round(statistics.mean(latencies), 2),
-    }
-
-
-def main(argv: List[str]) -> int:
-    ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
-    ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
-    ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
-    ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
-    ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
-    ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
-    ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
-    ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
-    ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
-    ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
-    ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
-    # Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
-    # We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
-    ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
-    ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
-    args = ap.parse_args(argv)
-
-    try:
-        if args.explicit_theme:
-            slugs = args.explicit_theme
-        elif args.all:
-            slugs = fetch_all_theme_slugs(args.url)
-        else:
-            slugs = select_theme_slugs(args.url, args.themes)
-    except SystemExit as e:  # pragma: no cover - dependency on live server
-        print(str(e), file=sys.stderr)
-        return 2
-
-    mode = "all" if args.all else "subset"
-    total_slugs = len(slugs)
-    if args.all and not args.loops_explicit:
-        # Derive loops = passes * total_slugs
-        args.loops = max(1, args.passes) * total_slugs
-
-    print(json.dumps({  # noqa: T201
-        "event": "preview_perf_start",
-        "mode": mode,
-        "total_slugs": total_slugs,
-        "planned_loops": args.loops,
-        "passes": args.passes if args.all else None,
-    }))
-
-    # Execution paths:
-    # 1. Standard subset or single-pass all: warm cycles -> single measured run
-    # 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
-    if args.all and args.passes > 1:
-        pass_results: List[Dict[str, Any]] = []
-        combined_latencies: List[float] = []
-        t0_all = time.time()
-        for p in range(1, args.passes + 1):
-            r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
-            lat = r.pop("_latencies", [])
-            combined_latencies.extend(lat)
-            pass_result = {
-                "pass": p,
-                "warm": r["warm"],
-                "elapsed_s": r["elapsed_s"],
-                "p50_ms": r["p50_ms"],
-                "p90_ms": r["p90_ms"],
-                "p95_ms": r["p95_ms"],
-                "avg_ms": r["avg_ms"],
-                "count": r["count"],
-            }
-            pass_results.append(pass_result)
-        total_elapsed = round(time.time() - t0_all, 3)
-        aggregate = _stats_from_latencies(combined_latencies)
-        result = {
-            "mode": mode,
-            "total_slugs": total_slugs,
-            "passes": args.passes,
-            "slugs": slugs,
-            "combined": {
-                **aggregate,
-                "elapsed_s": total_elapsed,
-            },
-            "passes_results": pass_results,
-            "cold_pass_p95_ms": pass_results[0]["p95_ms"],
-            "warm_pass_p95_ms": pass_results[-1]["p95_ms"],
-            "cold_pass_p50_ms": pass_results[0]["p50_ms"],
-            "warm_pass_p50_ms": pass_results[-1]["p50_ms"],
-        }
-        print(json.dumps({"event": "preview_perf_result", **result}, indent=2))  # noqa: T201
-        # Optional warm baseline extraction (final pass only; represents warmed steady-state)
-        if args.extract_warm_baseline:
-            try:
-                wb = pass_results[-1]
-                warm_obj = {
-                    "event": "preview_perf_warm_baseline",
-                    "mode": mode,
-                    "total_slugs": total_slugs,
-                    "warm_baseline": True,
-                    "source_pass": wb["pass"],
-                    "p50_ms": wb["p50_ms"],
-                    "p90_ms": wb["p90_ms"],
-                    "p95_ms": wb["p95_ms"],
-                    "avg_ms": wb["avg_ms"],
-                    "count": wb["count"],
-                    "slugs": slugs,
-                }
-                args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
-                args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
-                print(json.dumps({  # noqa: T201
-                    "event": "preview_perf_warm_baseline_written",
-                    "path": str(args.extract_warm_baseline),
-                    "p95_ms": wb["p95_ms"],
-                }))
-            except Exception as e:  # pragma: no cover
-                print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)}))  # noqa: T201
-    else:
-        # Warm-up loops first (if requested)
-        for w in range(args.warm):
-            run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
-        result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
-        result.pop("_latencies", None)
-        result["slugs"] = slugs
-        result["mode"] = mode
-        result["total_slugs"] = total_slugs
-        if args.all:
-            result["passes"] = args.passes
-        print(json.dumps({"event": "preview_perf_result", **result}, indent=2))  # noqa: T201
-
-    if args.output:
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            # Ensure we write the final result object (multi-pass already prepared above)
-            args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
-        except Exception as e:  # pragma: no cover
-            print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
-            return 3
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_ci_check.py
+++ b/code/scripts/preview_perf_ci_check.py
@ -1,106 +0,0 @@
-"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
-then compare against the committed warm baseline with threshold enforcement.
-
-Intended usage (example):
-  python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
-      --baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
-
-Exit codes:
-  0 success (within threshold)
-  2 regression (p95 delta > threshold)
-  3 setup / usage error
-
-Notes:
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
-  a warmed steady-state (server should have background refresh / typical load).
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import subprocess
-import sys
-import time
-import urllib.error
-import urllib.request
-from pathlib import Path
-def _wait_for_service(base_url: str, attempts: int = 12, delay: float = 1.5) -> bool:
-    health_url = base_url.rstrip("/") + "/healthz"
-    last_error: Exception | None = None
-    for attempt in range(1, attempts + 1):
-        try:
-            with urllib.request.urlopen(health_url, timeout=5) as resp:  # nosec B310 local CI
-                if 200 <= resp.status < 300:
-                    return True
-        except urllib.error.HTTPError as exc:
-            last_error = exc
-            if 400 <= exc.code < 500 and exc.code != 429:
-                # Treat permanent client errors (other than rate limit) as fatal
-                break
-        except Exception as exc:  # pragma: no cover - network variability
-            last_error = exc
-    time.sleep(delay * attempt)
-    print(json.dumps({
-        "event": "ci_perf_error",
-        "stage": "startup",
-        "message": "Service health check failed",
-        "url": health_url,
-        "attempts": attempts,
-        "error": str(last_error) if last_error else None,
-    }))
-    return False
-
-def run(cmd: list[str]) -> subprocess.CompletedProcess:
-    return subprocess.run(cmd, capture_output=True, text=True, check=False)
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
-    ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
-    ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
-    ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
-    ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
-    ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
-    args = ap.parse_args(argv)
-
-    if not args.baseline.exists():
-        print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
-        return 3
-
-    if not _wait_for_service(args.url):
-        return 3
-
-    # Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
-    # If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
-    passes = "2" if args.multi_pass else "1"
-    bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
-    bench_proc = run(bench_cmd)
-    if bench_proc.returncode != 0:
-        print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
-        return 3
-    print(bench_proc.stdout)
-
-    if not args.candidate_output.exists():
-        print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
-        return 3
-
-    compare_cmd = [
-        sys.executable,
-        "-m","code.scripts.preview_perf_compare",
-        "--baseline", str(args.baseline),
-        "--candidate", str(args.candidate_output),
-        "--warm-only",
-        "--p95-threshold", str(args.p95_threshold),
-    ]
-    cmp_proc = run(compare_cmd)
-    print(cmp_proc.stdout)
-    if cmp_proc.returncode == 2:
-        # Already printed JSON with failure status
-        return 2
-    if cmp_proc.returncode != 0:
-        print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
-        return 3
-    return 0
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_compare.py
+++ b/code/scripts/preview_perf_compare.py
@ -1,115 +0,0 @@
-"""Compare two preview benchmark JSON result files and emit delta stats.
-
-Usage:
-  python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
-
-Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
-If multi-pass structures are present (combined & passes_results) those are included.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-from pathlib import Path
-from typing import Any, Dict
-
-
-def load(path: Path) -> Dict[str, Any]:
-    data = json.loads(path.read_text(encoding="utf-8"))
-    # Multi-pass result may store stats under combined
-    if "combined" in data:
-        core = data["combined"].copy()
-        # Inject representative fields for uniform comparison
-        core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
-        core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
-        core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
-        core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
-        data["_core_stats"] = core
-    else:
-        data["_core_stats"] = {
-            k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
-        }
-    return data
-
-
-def pct_delta(new: float, old: float) -> float:
-    if old == 0:
-        return 0.0
-    return round(((new - old) / old) * 100.0, 2)
-
-
-def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
-    b = baseline["_core_stats"]
-    c = candidate["_core_stats"]
-    result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
-    for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
-        if b.get(k) is not None and c.get(k) is not None:
-            result[k] = {
-                "baseline": b[k],
-                "candidate": c[k],
-                "delta_pct": pct_delta(c[k], b[k]),
-            }
-    # If both have per-pass details include first and last pass p95/p50
-    if "passes_results" in baseline and "passes_results" in candidate:
-        result["passes"] = {
-            "baseline": {
-                "cold_p95": baseline.get("cold_pass_p95_ms"),
-                "warm_p95": baseline.get("warm_pass_p95_ms"),
-                "cold_p50": baseline.get("cold_pass_p50_ms"),
-                "warm_p50": baseline.get("warm_pass_p50_ms"),
-            },
-            "candidate": {
-                "cold_p95": candidate.get("cold_pass_p95_ms"),
-                "warm_p95": candidate.get("warm_pass_p95_ms"),
-                "cold_p50": candidate.get("cold_pass_p50_ms"),
-                "warm_p50": candidate.get("warm_pass_p50_ms"),
-            },
-        }
-    return result
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
-    ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
-    ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
-    ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
-    ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
-    args = ap.parse_args(argv)
-    if not args.baseline.exists():
-        raise SystemExit(f"Baseline not found: {args.baseline}")
-    if not args.candidate.exists():
-        raise SystemExit(f"Candidate not found: {args.candidate}")
-    baseline = load(args.baseline)
-    candidate = load(args.candidate)
-    # If warm-only requested and both have warm pass stats, override _core_stats before compare
-    if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
-        baseline["_core_stats"] = {
-            "p50_ms": baseline.get("warm_pass_p50_ms"),
-            "p90_ms": baseline.get("_core_stats", {}).get("p90_ms"),  # p90 not tracked per-pass; retain combined
-            "p95_ms": baseline.get("warm_pass_p95_ms"),
-            "avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
-            "count": baseline.get("_core_stats", {}).get("count"),
-        }
-        candidate["_core_stats"] = {
-            "p50_ms": candidate.get("warm_pass_p50_ms"),
-            "p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
-            "p95_ms": candidate.get("warm_pass_p95_ms"),
-            "avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
-            "count": candidate.get("_core_stats", {}).get("count"),
-        }
-    cmp = compare(baseline, candidate)
-    payload = {"event": "preview_perf_compare", **cmp}
-    if args.p95_threshold is not None and "p95_ms" in cmp:
-        delta = cmp["p95_ms"]["delta_pct"]
-        payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
-        if delta is not None and delta > args.p95_threshold:
-            payload["result"] = "fail"
-            print(json.dumps(payload, indent=2))  # noqa: T201
-            return 2
-        payload["result"] = "pass"
-    print(json.dumps(payload, indent=2))  # noqa: T201
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(__import__('sys').argv[1:]))
--- a/code/scripts/warm_preview_traffic.py
+++ b/code/scripts/warm_preview_traffic.py
@ -1,91 +0,0 @@
-"""Generate warm preview traffic to populate theme preview cache & metrics.
-
-Usage:
-  python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
-      --base-url http://localhost:8000 --delay 0.05
-
-Requirements:
-  - FastAPI server running locally exposing /themes endpoints
-  - WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
-
-Strategy:
-  1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
-  2. Extract theme slugs via regex on data-theme-id attributes.
-  3. Issue REPEATS preview fragment requests per slug in order.
-  4. Print simple timing / status summary.
-
-This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
-"""
-from __future__ import annotations
-
-import argparse
-import re
-import time
-import urllib.request
-import urllib.error
-from typing import List
-
-LIST_PATH = "/themes/fragment/list"
-PREVIEW_PATH = "/themes/fragment/preview/{slug}"
-
-
-def fetch(url: str) -> str:
-    req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
-    with urllib.request.urlopen(req, timeout=15) as resp:  # nosec B310 (local trusted)
-        return resp.read().decode("utf-8", "replace")
-
-
-def extract_slugs(html: str, limit: int) -> List[str]:
-    slugs = []
-    for m in re.finditer(r'data-theme-id="([^"]+)"', html):
-        s = m.group(1).strip()
-        if s and s not in slugs:
-            slugs.append(s)
-        if len(slugs) >= limit:
-            break
-    return slugs
-
-
-def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
-    list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
-    print(f"[warm] Fetching list: {list_url}")
-    try:
-        html = fetch(list_url)
-    except urllib.error.URLError as e:  # pragma: no cover
-        raise SystemExit(f"Failed fetching list: {e}")
-    slugs = extract_slugs(html, count)
-    if not slugs:
-        raise SystemExit("No theme slugs extracted – cannot warm.")
-    print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
-    total_requests = 0
-    start = time.time()
-    for r in range(repeats):
-        print(f"[warm] Pass {r+1}/{repeats}")
-        for slug in slugs:
-            url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
-            try:
-                fetch(url)
-            except Exception as e:  # pragma: no cover
-                print(f"  [warn] Failed {slug}: {e}")
-            else:
-                total_requests += 1
-            if delay:
-                time.sleep(delay)
-    dur = time.time() - start
-    print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
-    print("[warm] Done. Now run metrics snapshot to capture warm p95.")
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Generate warm preview traffic")
-    ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
-    ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
-    ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
-    ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
-    args = ap.parse_args(argv)
-    warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
-    return 0
-
-if __name__ == "__main__":  # pragma: no cover
-    import sys
-    raise SystemExit(main(sys.argv[1:]))