From 2888d97883a16dd126bab03ba72f8ad27ce7b0ec Mon Sep 17 00:00:00 2001 From: matt Date: Tue, 30 Sep 2025 16:01:51 -0700 Subject: [PATCH] build(ci): harden preview perf gate --- CHANGELOG.md | 1 + RELEASE_NOTES_TEMPLATE.md | 1 + code/scripts/preview_perf_benchmark.py | 13 ++++++++- code/scripts/preview_perf_ci_check.py | 31 +++++++++++++++++++++ code/tests/test_preview_perf_fetch_retry.py | 20 +++++++++++++ 5 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 code/tests/test_preview_perf_fetch_retry.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d3fe75..9ce183f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning - Commander thumbnails use a fixed-width 160px frame (scaling down on small screens) to eliminate inconsistent image sizing across the catalog. - Commander list pagination controls now appear above and below the results and automatically scroll to the top when switching pages for quicker navigation. - Mobile commander rows now feature larger thumbnails and a centered preview modal with expanded card art for improved readability. +- Preview performance CI check now waits for `/healthz` and retries theme catalog pagination fetches to dodge transient 500s during cold starts. ### Fixed - _No changes yet._ diff --git a/RELEASE_NOTES_TEMPLATE.md b/RELEASE_NOTES_TEMPLATE.md index b2fbfca..2c1b269 100644 --- a/RELEASE_NOTES_TEMPLATE.md +++ b/RELEASE_NOTES_TEMPLATE.md @@ -19,6 +19,7 @@ - Content Security Policy upgrade directive ensures HTMX pagination requests remain HTTPS-safe behind proxies. - Commander thumbnails adopt a fixed-width 160px frame (responsive on small screens) for consistent layout. - Mobile commander rows now feature larger thumbnails and a centered preview modal with expanded card art for improved readability. +- Preview performance CI check now waits for service health and retries catalog pagination fetches to smooth out transient 500s on cold boots. ## Fixed - Documented friendly handling for missing `commander_cards.csv` data during manual QA drills to prevent white-screen failures. \ No newline at end of file diff --git a/code/scripts/preview_perf_benchmark.py b/code/scripts/preview_perf_benchmark.py index 2fc4c43..94404ab 100644 --- a/code/scripts/preview_perf_benchmark.py +++ b/code/scripts/preview_perf_benchmark.py @@ -37,6 +37,17 @@ def _fetch_json(url: str) -> Dict[str, Any]: return json.loads(data) # type: ignore[return-value] +def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]: + for attempt in range(1, attempts + 1): + try: + return _fetch_json(url) + except Exception: # pragma: no cover - network variability + if attempt < attempts: + time.sleep(delay * attempt) + else: + raise + + def select_theme_slugs(base_url: str, count: int) -> List[str]: """Discover theme slugs for benchmarking. @@ -89,7 +100,7 @@ def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]: while True: try: url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}" - data = _fetch_json(url) + data = _fetch_json_with_retry(url) except Exception as e: # pragma: no cover - network variability raise SystemExit(f"Failed fetching themes page offset={offset}: {e}") items = data.get("items") or [] diff --git a/code/scripts/preview_perf_ci_check.py b/code/scripts/preview_perf_ci_check.py index b57774c..108f219 100644 --- a/code/scripts/preview_perf_ci_check.py +++ b/code/scripts/preview_perf_ci_check.py @@ -21,7 +21,35 @@ import argparse import json import subprocess import sys +import time +import urllib.error +import urllib.request from pathlib import Path +def _wait_for_service(base_url: str, attempts: int = 8, delay: float = 1.5) -> bool: + health_url = base_url.rstrip("/") + "/healthz" + last_error: Exception | None = None + for attempt in range(1, attempts + 1): + try: + with urllib.request.urlopen(health_url, timeout=5) as resp: # nosec B310 local CI + if 200 <= resp.status < 300: + return True + except urllib.error.HTTPError as exc: + last_error = exc + if 400 <= exc.code < 500 and exc.code != 429: + # Treat permanent client errors (other than rate limit) as fatal + break + except Exception as exc: # pragma: no cover - network variability + last_error = exc + time.sleep(delay) + print(json.dumps({ + "event": "ci_perf_error", + "stage": "startup", + "message": "Service health check failed", + "url": health_url, + "attempts": attempts, + "error": str(last_error) if last_error else None, + })) + return False def run(cmd: list[str]) -> subprocess.CompletedProcess: return subprocess.run(cmd, capture_output=True, text=True, check=False) @@ -39,6 +67,9 @@ def main(argv: list[str]) -> int: print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)})) return 3 + if not _wait_for_service(args.url): + return 3 + # Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast) # If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state. passes = "2" if args.multi_pass else "1" diff --git a/code/tests/test_preview_perf_fetch_retry.py b/code/tests/test_preview_perf_fetch_retry.py new file mode 100644 index 0000000..7d5315b --- /dev/null +++ b/code/tests/test_preview_perf_fetch_retry.py @@ -0,0 +1,20 @@ +from code.scripts import preview_perf_benchmark as perf + + +def test_fetch_all_theme_slugs_retries(monkeypatch): + calls = {"count": 0} + + def fake_fetch(url): # type: ignore[override] + calls["count"] += 1 + if calls["count"] == 1: + raise RuntimeError("transient 500") + assert url.endswith("offset=0") + return {"items": [{"id": "alpha"}], "next_offset": None} + + monkeypatch.setattr(perf, "_fetch_json", fake_fetch) + monkeypatch.setattr(perf.time, "sleep", lambda *_args, **_kwargs: None) + + slugs = perf.fetch_all_theme_slugs("http://example.com", page_limit=1) + + assert slugs == ["alpha"] + assert calls["count"] == 2