mtg_python_deckbuilder/code/scripts/check_random_theme_perf.py

"""Opt-in guard that compares multi-theme filter performance to a stored baseline.

Run inside the project virtual environment:

    python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json

The script executes the same profiling loop as `profile_multi_theme_filter` and fails
if the observed mean or p95 timings regress more than the allowed threshold.
"""
from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, Tuple

PROJECT_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from code.scripts.profile_multi_theme_filter import run_profile  # type: ignore  # noqa: E402


def _load_baseline(path: Path) -> Dict[str, Any]:
    if not path.exists():
        raise FileNotFoundError(f"Baseline file not found: {path}")
    data = json.loads(path.read_text(encoding="utf-8"))
    return data


def _extract(metric: Dict[str, Any], key: str) -> float:
    try:
        value = float(metric.get(key, 0.0))
    except Exception:
        value = 0.0
    return value


def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
    a_mean = _extract(actual, "mean_ms")
    b_mean = _extract(baseline, "mean_ms")
    a_p95 = _extract(actual, "p95_ms")
    b_p95 = _extract(baseline, "p95_ms")

    allowed_mean = b_mean * (1.0 + threshold)
    allowed_p95 = b_p95 * (1.0 + threshold)

    mean_ok = a_mean <= allowed_mean or b_mean == 0.0
    p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0

    status = mean_ok and p95_ok

    def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
        trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
        trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
        limit_str = f"≤ {allowed_val:.3f}ms" if baseline_val else "n/a"
        return f"    {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"

    rows = [f"Section: {name}"]
    rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
    rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
    return status, "\n".join(rows)


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
    parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
    parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
    parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
    parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
    parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
    args = parser.parse_args(argv)

    baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
    if args.update_baseline and not baseline_path.parent.exists():
        baseline_path.parent.mkdir(parents=True, exist_ok=True)

    if not args.update_baseline:
        baseline = _load_baseline(baseline_path)
    else:
        baseline = {}

    results = run_profile(args.iterations, args.seed)

    cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
    synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)

    print("Iterations:", results.get("iterations"))
    print("Seed:", results.get("seed"))
    print(cascade_report)
    print(synergy_report)

    overall_ok = cascade_status and synergy_status

    if args.update_baseline:
        payload = {
            "iterations": results.get("iterations"),
            "seed": results.get("seed"),
            "cascade": results.get("cascade"),
            "synergy": results.get("synergy"),
        }
        baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
        print(f"Baseline updated → {baseline_path}")
        return 0

    if not overall_ok:
        print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
        return 1

    print("PASS: performance within allowed threshold")
    return 0


if __name__ == "__main__":  # pragma: no cover
    raise SystemExit(main())