mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat(web): Core Refactor Phase A — extract sampling and cache modules; add adaptive TTL + eviction heuristics, Redis PoC, and metrics wiring. Tests added for TTL, eviction, exports, splash-adaptive, card index, and service worker. Docs+roadmap updated.
This commit is contained in:
parent
c4a7fc48ea
commit
a029d430c5
49 changed files with 3889 additions and 701 deletions
309
code/scripts/preview_perf_benchmark.py
Normal file
309
code/scripts/preview_perf_benchmark.py
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
|
||||
|
||||
Runs warm-up plus measured request loops against several theme slugs and prints
|
||||
aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
|
||||
establish or validate that refactor did not introduce >5% p95 regression.
|
||||
|
||||
Usage (ensure server running locally – commonly :8080 in docker compose):
|
||||
python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
|
||||
--url http://localhost:8080 --warm 1 --limit 12
|
||||
|
||||
Theme slug discovery hierarchy (when --theme not provided):
|
||||
1. Try /themes/index.json (legacy / planned static index)
|
||||
2. Fallback to /themes/api/themes (current API) and take the first N ids
|
||||
The discovered slugs are sorted deterministically then truncated to N.
|
||||
|
||||
NOTE: This is intentionally minimal (no external deps). For stable comparisons
|
||||
run with identical parameters pre/post-change and commit the JSON output under
|
||||
logs/perf/.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import statistics
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _fetch_json(url: str) -> Dict[str, Any]:
|
||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 local dev
|
||||
data = resp.read().decode("utf-8", "replace")
|
||||
return json.loads(data) # type: ignore[return-value]
|
||||
|
||||
|
||||
def select_theme_slugs(base_url: str, count: int) -> List[str]:
|
||||
"""Discover theme slugs for benchmarking.
|
||||
|
||||
Attempts legacy static index first, then falls back to live API listing.
|
||||
"""
|
||||
errors: List[str] = []
|
||||
slugs: List[str] = []
|
||||
# Attempt 1: legacy /themes/index.json
|
||||
try:
|
||||
idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
|
||||
entries = idx.get("themes") or []
|
||||
for it in entries:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
slug = it.get("slug") or it.get("id") or it.get("theme_id")
|
||||
if isinstance(slug, str):
|
||||
slugs.append(slug)
|
||||
except Exception as e: # pragma: no cover - network variability
|
||||
errors.append(f"index.json failed: {e}")
|
||||
|
||||
if not slugs:
|
||||
# Attempt 2: live API listing
|
||||
try:
|
||||
listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
|
||||
items = listing.get("items") or []
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
||||
if isinstance(tid, str):
|
||||
slugs.append(tid)
|
||||
except Exception as e: # pragma: no cover - network variability
|
||||
errors.append(f"api/themes failed: {e}")
|
||||
|
||||
slugs = sorted(set(slugs))[:count]
|
||||
if not slugs:
|
||||
raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
|
||||
return slugs
|
||||
|
||||
|
||||
def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
|
||||
"""Fetch all theme slugs via paginated /themes/api/themes endpoint.
|
||||
|
||||
Uses maximum page size (200) and iterates using offset until no next page.
|
||||
Returns deterministic sorted unique list of slugs.
|
||||
"""
|
||||
slugs: List[str] = []
|
||||
offset = 0
|
||||
seen: set[str] = set()
|
||||
while True:
|
||||
try:
|
||||
url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
|
||||
data = _fetch_json(url)
|
||||
except Exception as e: # pragma: no cover - network variability
|
||||
raise SystemExit(f"Failed fetching themes page offset={offset}: {e}")
|
||||
items = data.get("items") or []
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
tid = it.get("id") or it.get("slug") or it.get("theme_id")
|
||||
if isinstance(tid, str) and tid not in seen:
|
||||
seen.add(tid)
|
||||
slugs.append(tid)
|
||||
next_offset = data.get("next_offset")
|
||||
if not next_offset or next_offset == offset:
|
||||
break
|
||||
offset = int(next_offset)
|
||||
return sorted(slugs)
|
||||
|
||||
|
||||
def percentile(values: List[float], pct: float) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
sv = sorted(values)
|
||||
k = (len(sv) - 1) * pct
|
||||
f = int(k)
|
||||
c = min(f + 1, len(sv) - 1)
|
||||
if f == c:
|
||||
return sv[f]
|
||||
d0 = sv[f] * (c - k)
|
||||
d1 = sv[c] * (k - f)
|
||||
return d0 + d1
|
||||
|
||||
|
||||
def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
|
||||
latencies: List[float] = []
|
||||
per_slug_counts = {s: 0 for s in slugs}
|
||||
t_start = time.time()
|
||||
for i in range(loops):
|
||||
slug = slugs[i % len(slugs)]
|
||||
# path_template may contain {slug} and {limit}
|
||||
try:
|
||||
rel = path_template.format(slug=slug, limit=limit)
|
||||
except Exception:
|
||||
rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
|
||||
if not rel.startswith('/'):
|
||||
rel = '/' + rel
|
||||
url = f"{base_url.rstrip('/')}{rel}"
|
||||
t0 = time.time()
|
||||
try:
|
||||
_fetch_json(url)
|
||||
except Exception as e:
|
||||
print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)})) # noqa: T201
|
||||
continue
|
||||
ms = (time.time() - t0) * 1000.0
|
||||
latencies.append(ms)
|
||||
per_slug_counts[slug] += 1
|
||||
elapsed = time.time() - t_start
|
||||
return {
|
||||
"warm": warm,
|
||||
"loops": loops,
|
||||
"slugs": slugs,
|
||||
"per_slug_requests": per_slug_counts,
|
||||
"elapsed_s": round(elapsed, 3),
|
||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
||||
"avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
|
||||
"count": len(latencies),
|
||||
"_latencies": latencies, # internal (removed in final result unless explicitly retained)
|
||||
}
|
||||
|
||||
|
||||
def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
|
||||
if not latencies:
|
||||
return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
|
||||
return {
|
||||
"count": len(latencies),
|
||||
"p50_ms": round(percentile(latencies, 0.50), 2),
|
||||
"p90_ms": round(percentile(latencies, 0.90), 2),
|
||||
"p95_ms": round(percentile(latencies, 0.95), 2),
|
||||
"avg_ms": round(statistics.mean(latencies), 2),
|
||||
}
|
||||
|
||||
|
||||
def main(argv: List[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
|
||||
ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
|
||||
ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
|
||||
ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
|
||||
ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
|
||||
ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
|
||||
ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
|
||||
ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
|
||||
ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
|
||||
ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
|
||||
ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
|
||||
# Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
|
||||
# We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
|
||||
ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
|
||||
ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
try:
|
||||
if args.explicit_theme:
|
||||
slugs = args.explicit_theme
|
||||
elif args.all:
|
||||
slugs = fetch_all_theme_slugs(args.url)
|
||||
else:
|
||||
slugs = select_theme_slugs(args.url, args.themes)
|
||||
except SystemExit as e: # pragma: no cover - dependency on live server
|
||||
print(str(e), file=sys.stderr)
|
||||
return 2
|
||||
|
||||
mode = "all" if args.all else "subset"
|
||||
total_slugs = len(slugs)
|
||||
if args.all and not args.loops_explicit:
|
||||
# Derive loops = passes * total_slugs
|
||||
args.loops = max(1, args.passes) * total_slugs
|
||||
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_start",
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"planned_loops": args.loops,
|
||||
"passes": args.passes if args.all else None,
|
||||
}))
|
||||
|
||||
# Execution paths:
|
||||
# 1. Standard subset or single-pass all: warm cycles -> single measured run
|
||||
# 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
|
||||
if args.all and args.passes > 1:
|
||||
pass_results: List[Dict[str, Any]] = []
|
||||
combined_latencies: List[float] = []
|
||||
t0_all = time.time()
|
||||
for p in range(1, args.passes + 1):
|
||||
r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
|
||||
lat = r.pop("_latencies", [])
|
||||
combined_latencies.extend(lat)
|
||||
pass_result = {
|
||||
"pass": p,
|
||||
"warm": r["warm"],
|
||||
"elapsed_s": r["elapsed_s"],
|
||||
"p50_ms": r["p50_ms"],
|
||||
"p90_ms": r["p90_ms"],
|
||||
"p95_ms": r["p95_ms"],
|
||||
"avg_ms": r["avg_ms"],
|
||||
"count": r["count"],
|
||||
}
|
||||
pass_results.append(pass_result)
|
||||
total_elapsed = round(time.time() - t0_all, 3)
|
||||
aggregate = _stats_from_latencies(combined_latencies)
|
||||
result = {
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"passes": args.passes,
|
||||
"slugs": slugs,
|
||||
"combined": {
|
||||
**aggregate,
|
||||
"elapsed_s": total_elapsed,
|
||||
},
|
||||
"passes_results": pass_results,
|
||||
"cold_pass_p95_ms": pass_results[0]["p95_ms"],
|
||||
"warm_pass_p95_ms": pass_results[-1]["p95_ms"],
|
||||
"cold_pass_p50_ms": pass_results[0]["p50_ms"],
|
||||
"warm_pass_p50_ms": pass_results[-1]["p50_ms"],
|
||||
}
|
||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
||||
# Optional warm baseline extraction (final pass only; represents warmed steady-state)
|
||||
if args.extract_warm_baseline:
|
||||
try:
|
||||
wb = pass_results[-1]
|
||||
warm_obj = {
|
||||
"event": "preview_perf_warm_baseline",
|
||||
"mode": mode,
|
||||
"total_slugs": total_slugs,
|
||||
"warm_baseline": True,
|
||||
"source_pass": wb["pass"],
|
||||
"p50_ms": wb["p50_ms"],
|
||||
"p90_ms": wb["p90_ms"],
|
||||
"p95_ms": wb["p95_ms"],
|
||||
"avg_ms": wb["avg_ms"],
|
||||
"count": wb["count"],
|
||||
"slugs": slugs,
|
||||
}
|
||||
args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
|
||||
print(json.dumps({ # noqa: T201
|
||||
"event": "preview_perf_warm_baseline_written",
|
||||
"path": str(args.extract_warm_baseline),
|
||||
"p95_ms": wb["p95_ms"],
|
||||
}))
|
||||
except Exception as e: # pragma: no cover
|
||||
print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)})) # noqa: T201
|
||||
else:
|
||||
# Warm-up loops first (if requested)
|
||||
for w in range(args.warm):
|
||||
run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
|
||||
result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
|
||||
result.pop("_latencies", None)
|
||||
result["slugs"] = slugs
|
||||
result["mode"] = mode
|
||||
result["total_slugs"] = total_slugs
|
||||
if args.all:
|
||||
result["passes"] = args.passes
|
||||
print(json.dumps({"event": "preview_perf_result", **result}, indent=2)) # noqa: T201
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Ensure we write the final result object (multi-pass already prepared above)
|
||||
args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
|
||||
return 3
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
75
code/scripts/preview_perf_ci_check.py
Normal file
75
code/scripts/preview_perf_ci_check.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
|
||||
then compare against the committed warm baseline with threshold enforcement.
|
||||
|
||||
Intended usage (example):
|
||||
python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
|
||||
--baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
|
||||
|
||||
Exit codes:
|
||||
0 success (within threshold)
|
||||
2 regression (p95 delta > threshold)
|
||||
3 setup / usage error
|
||||
|
||||
Notes:
|
||||
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
|
||||
a warmed steady-state (server should have background refresh / typical load).
|
||||
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def run(cmd: list[str]) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(cmd, capture_output=True, text=True, check=False)
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
|
||||
ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
|
||||
ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
|
||||
ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
|
||||
ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
|
||||
ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
if not args.baseline.exists():
|
||||
print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
|
||||
return 3
|
||||
|
||||
# Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
|
||||
# If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
|
||||
passes = "2" if args.multi_pass else "1"
|
||||
bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
|
||||
bench_proc = run(bench_cmd)
|
||||
if bench_proc.returncode != 0:
|
||||
print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
|
||||
return 3
|
||||
print(bench_proc.stdout)
|
||||
|
||||
if not args.candidate_output.exists():
|
||||
print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
|
||||
return 3
|
||||
|
||||
compare_cmd = [
|
||||
sys.executable,
|
||||
"-m","code.scripts.preview_perf_compare",
|
||||
"--baseline", str(args.baseline),
|
||||
"--candidate", str(args.candidate_output),
|
||||
"--warm-only",
|
||||
"--p95-threshold", str(args.p95_threshold),
|
||||
]
|
||||
cmp_proc = run(compare_cmd)
|
||||
print(cmp_proc.stdout)
|
||||
if cmp_proc.returncode == 2:
|
||||
# Already printed JSON with failure status
|
||||
return 2
|
||||
if cmp_proc.returncode != 0:
|
||||
print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
|
||||
return 3
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
115
code/scripts/preview_perf_compare.py
Normal file
115
code/scripts/preview_perf_compare.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"""Compare two preview benchmark JSON result files and emit delta stats.
|
||||
|
||||
Usage:
|
||||
python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
|
||||
|
||||
Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
|
||||
If multi-pass structures are present (combined & passes_results) those are included.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
def load(path: Path) -> Dict[str, Any]:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
# Multi-pass result may store stats under combined
|
||||
if "combined" in data:
|
||||
core = data["combined"].copy()
|
||||
# Inject representative fields for uniform comparison
|
||||
core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
|
||||
core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
|
||||
core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
|
||||
core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
|
||||
data["_core_stats"] = core
|
||||
else:
|
||||
data["_core_stats"] = {
|
||||
k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
|
||||
}
|
||||
return data
|
||||
|
||||
|
||||
def pct_delta(new: float, old: float) -> float:
|
||||
if old == 0:
|
||||
return 0.0
|
||||
return round(((new - old) / old) * 100.0, 2)
|
||||
|
||||
|
||||
def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
|
||||
b = baseline["_core_stats"]
|
||||
c = candidate["_core_stats"]
|
||||
result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
|
||||
for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
|
||||
if b.get(k) is not None and c.get(k) is not None:
|
||||
result[k] = {
|
||||
"baseline": b[k],
|
||||
"candidate": c[k],
|
||||
"delta_pct": pct_delta(c[k], b[k]),
|
||||
}
|
||||
# If both have per-pass details include first and last pass p95/p50
|
||||
if "passes_results" in baseline and "passes_results" in candidate:
|
||||
result["passes"] = {
|
||||
"baseline": {
|
||||
"cold_p95": baseline.get("cold_pass_p95_ms"),
|
||||
"warm_p95": baseline.get("warm_pass_p95_ms"),
|
||||
"cold_p50": baseline.get("cold_pass_p50_ms"),
|
||||
"warm_p50": baseline.get("warm_pass_p50_ms"),
|
||||
},
|
||||
"candidate": {
|
||||
"cold_p95": candidate.get("cold_pass_p95_ms"),
|
||||
"warm_p95": candidate.get("warm_pass_p95_ms"),
|
||||
"cold_p50": candidate.get("cold_pass_p50_ms"),
|
||||
"warm_p50": candidate.get("warm_pass_p50_ms"),
|
||||
},
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
|
||||
ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
|
||||
ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
|
||||
ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
|
||||
ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
|
||||
args = ap.parse_args(argv)
|
||||
if not args.baseline.exists():
|
||||
raise SystemExit(f"Baseline not found: {args.baseline}")
|
||||
if not args.candidate.exists():
|
||||
raise SystemExit(f"Candidate not found: {args.candidate}")
|
||||
baseline = load(args.baseline)
|
||||
candidate = load(args.candidate)
|
||||
# If warm-only requested and both have warm pass stats, override _core_stats before compare
|
||||
if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
|
||||
baseline["_core_stats"] = {
|
||||
"p50_ms": baseline.get("warm_pass_p50_ms"),
|
||||
"p90_ms": baseline.get("_core_stats", {}).get("p90_ms"), # p90 not tracked per-pass; retain combined
|
||||
"p95_ms": baseline.get("warm_pass_p95_ms"),
|
||||
"avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
|
||||
"count": baseline.get("_core_stats", {}).get("count"),
|
||||
}
|
||||
candidate["_core_stats"] = {
|
||||
"p50_ms": candidate.get("warm_pass_p50_ms"),
|
||||
"p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
|
||||
"p95_ms": candidate.get("warm_pass_p95_ms"),
|
||||
"avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
|
||||
"count": candidate.get("_core_stats", {}).get("count"),
|
||||
}
|
||||
cmp = compare(baseline, candidate)
|
||||
payload = {"event": "preview_perf_compare", **cmp}
|
||||
if args.p95_threshold is not None and "p95_ms" in cmp:
|
||||
delta = cmp["p95_ms"]["delta_pct"]
|
||||
payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
|
||||
if delta is not None and delta > args.p95_threshold:
|
||||
payload["result"] = "fail"
|
||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
||||
return 2
|
||||
payload["result"] = "pass"
|
||||
print(json.dumps(payload, indent=2)) # noqa: T201
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main(__import__('sys').argv[1:]))
|
||||
94
code/scripts/snapshot_taxonomy.py
Normal file
94
code/scripts/snapshot_taxonomy.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""Snapshot the current power bracket taxonomy to a dated JSON artifact.
|
||||
|
||||
Outputs a JSON file under logs/taxonomy_snapshots/ named
|
||||
taxonomy_<YYYYMMDD>_<HHMMSS>.json
|
||||
containing:
|
||||
{
|
||||
"generated_at": ISO8601,
|
||||
"hash": sha256 hex of canonical payload (excluding this top-level wrapper),
|
||||
"brackets": [ {level,name,short_desc,long_desc,limits} ... ]
|
||||
}
|
||||
|
||||
If a snapshot with identical hash already exists today, creation is skipped
|
||||
unless --force provided.
|
||||
|
||||
Usage (from repo root):
|
||||
python -m code.scripts.snapshot_taxonomy
|
||||
python -m code.scripts.snapshot_taxonomy --force
|
||||
|
||||
Intended to provide an auditable evolution trail for taxonomy adjustments
|
||||
before we implement taxonomy-aware sampling changes.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from code.deck_builder.phases.phase0_core import BRACKET_DEFINITIONS
|
||||
|
||||
SNAP_DIR = Path("logs/taxonomy_snapshots")
|
||||
SNAP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _canonical_brackets():
|
||||
return [
|
||||
{
|
||||
"level": b.level,
|
||||
"name": b.name,
|
||||
"short_desc": b.short_desc,
|
||||
"long_desc": b.long_desc,
|
||||
"limits": b.limits,
|
||||
}
|
||||
for b in sorted(BRACKET_DEFINITIONS, key=lambda x: x.level)
|
||||
]
|
||||
|
||||
|
||||
def compute_hash(brackets) -> str:
|
||||
# Canonical JSON with sorted keys for repeatable hash
|
||||
payload = json.dumps(brackets, sort_keys=True, separators=(",", ":"))
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def find_existing_hashes() -> Dict[str, Path]:
|
||||
existing = {}
|
||||
for p in SNAP_DIR.glob("taxonomy_*.json"):
|
||||
try:
|
||||
data = json.loads(p.read_text(encoding="utf-8"))
|
||||
h = data.get("hash")
|
||||
if h:
|
||||
existing[h] = p
|
||||
except Exception:
|
||||
continue
|
||||
return existing
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--force", action="store_true", help="Write new snapshot even if identical hash exists today")
|
||||
args = ap.parse_args()
|
||||
|
||||
brackets = _canonical_brackets()
|
||||
h = compute_hash(brackets)
|
||||
existing = find_existing_hashes()
|
||||
if h in existing and not args.force:
|
||||
print(f"Snapshot identical (hash={h[:12]}...) exists: {existing[h].name}; skipping.")
|
||||
return 0
|
||||
|
||||
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
out = SNAP_DIR / f"taxonomy_{ts}.json"
|
||||
wrapper: Dict[str, Any] = {
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
"hash": h,
|
||||
"brackets": brackets,
|
||||
}
|
||||
out.write_text(json.dumps(wrapper, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
print(f"Wrote taxonomy snapshot {out} (hash={h[:12]}...)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue