feat(preview): sampling, metrics, governance, server mana data

Preview endpoint + fast caches; curated pins + role quotas + rarity/overlap tuning; catalog+preview metrics; governance enforcement flags; server mana/color identity fields; docs/tests/scripts updated.
This commit is contained in:
matt 2025-09-23 09:19:23 -07:00
parent 8f47dfbb81
commit c4a7fc48ea
40 changed files with 6092 additions and 17312 deletions

View file

@ -794,13 +794,40 @@ def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
entries.append(entry)
# Renamed from 'provenance' to 'metadata_info' (migration phase)
# Compute deterministic hash of YAML catalog + synergy_cap for drift detection
import hashlib as _hashlib # local import to avoid top-level cost
def _catalog_hash() -> str:
h = _hashlib.sha256()
# Stable ordering: sort by display_name then key ordering inside dict for a subset of stable fields
for name in sorted(yaml_catalog.keys()):
yobj = yaml_catalog[name]
try:
# Compose a tuple of fields that should reflect editorial drift
payload = (
getattr(yobj, 'id', ''),
getattr(yobj, 'display_name', ''),
tuple(getattr(yobj, 'curated_synergies', []) or []),
tuple(getattr(yobj, 'enforced_synergies', []) or []),
tuple(getattr(yobj, 'example_commanders', []) or []),
tuple(getattr(yobj, 'example_cards', []) or []),
getattr(yobj, 'deck_archetype', None),
getattr(yobj, 'popularity_hint', None),
getattr(yobj, 'description', None),
getattr(yobj, 'editorial_quality', None),
)
h.update(repr(payload).encode('utf-8'))
except Exception:
continue
h.update(str(synergy_cap).encode('utf-8'))
return h.hexdigest()
metadata_info = {
'mode': 'merge',
'generated_at': time.strftime('%Y-%m-%dT%H:%M:%S'),
'curated_yaml_files': len(yaml_catalog),
'synergy_cap': synergy_cap,
'inference': 'pmi',
'version': 'phase-b-merge-v1'
'version': 'phase-b-merge-v1',
'catalog_hash': _catalog_hash(),
}
# Optional popularity analytics export for Phase D metrics collection
if os.environ.get('EDITORIAL_POP_EXPORT'):

View file

@ -0,0 +1,105 @@
"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
Usage (from repo root virtualenv):
python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
human-readable summary to stdout.
"""
from __future__ import annotations
import argparse
import json
import sys
import time
from pathlib import Path
from typing import Any, Dict
import urllib.request
import urllib.error
DEFAULT_URL = "http://localhost:8000/themes/metrics"
def fetch_metrics(url: str) -> Dict[str, Any]:
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=10) as resp: # nosec B310 (local trusted)
data = resp.read().decode("utf-8", "replace")
try:
return json.loads(data) # type: ignore[return-value]
except json.JSONDecodeError as e: # pragma: no cover - unlikely if server OK
raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
per_theme = preview.get("per_theme") or {}
# Compute top slow themes by avg_ms
items = []
for slug, info in per_theme.items():
if not isinstance(info, dict):
continue
avg = info.get("avg_ms")
if isinstance(avg, (int, float)):
items.append((slug, float(avg), info))
items.sort(key=lambda x: x[1], reverse=True)
top = items[:top_n]
return {
"preview_requests": preview.get("preview_requests"),
"preview_cache_hits": preview.get("preview_cache_hits"),
"preview_avg_build_ms": preview.get("preview_avg_build_ms"),
"preview_p95_build_ms": preview.get("preview_p95_build_ms"),
"preview_ttl_seconds": preview.get("preview_ttl_seconds"),
"editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
"top_slowest": [
{
"slug": slug,
"avg_ms": avg,
"p95_ms": info.get("p95_ms"),
"builds": info.get("builds"),
"requests": info.get("requests"),
"avg_curated_pct": info.get("avg_curated_pct"),
}
for slug, avg, info in top
],
}
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Snapshot preview metrics")
ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
args = ap.parse_args(argv)
try:
raw = fetch_metrics(args.url)
except urllib.error.URLError as e:
print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
return 2
summary = summarize(raw, args.limit)
snapshot = {
"captured_at": int(time.time()),
"source": args.url,
"summary": summary,
}
if args.output:
try:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
except Exception as e: # pragma: no cover
print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
return 3
if not args.quiet:
print("Preview Metrics Snapshot:")
print(json.dumps(summary, indent=2))
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main(sys.argv[1:]))

View file

@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""Fast path theme catalog presence & schema sanity validator.
Checks:
1. theme_list.json exists.
2. Loads JSON and ensures top-level keys present: themes (list), metadata_info (dict).
3. Basic field contract for each theme: id, theme, synergies (list), description.
4. Enforces presence of catalog_hash inside metadata_info for drift detection.
5. Optionally validates against Pydantic models if available (best effort).
Exit codes:
0 success
1 structural failure / missing file
2 partial validation warnings elevated via --strict
"""
from __future__ import annotations
import sys
import json
import argparse
import pathlib
import typing as t
THEME_LIST_PATH = pathlib.Path('config/themes/theme_list.json')
class Problem:
def __init__(self, level: str, message: str):
self.level = level
self.message = message
def __repr__(self):
return f"{self.level.upper()}: {self.message}"
def load_json(path: pathlib.Path) -> t.Any:
try:
return json.loads(path.read_text(encoding='utf-8') or '{}')
except FileNotFoundError:
raise
except Exception as e: # pragma: no cover
raise RuntimeError(f"parse_error: {e}")
def validate(data: t.Any) -> list[Problem]:
probs: list[Problem] = []
if not isinstance(data, dict):
probs.append(Problem('error','top-level not an object'))
return probs
themes = data.get('themes')
if not isinstance(themes, list) or not themes:
probs.append(Problem('error','themes list missing or empty'))
meta = data.get('metadata_info')
if not isinstance(meta, dict):
probs.append(Problem('error','metadata_info missing or not object'))
else:
if not meta.get('catalog_hash'):
probs.append(Problem('error','metadata_info.catalog_hash missing'))
if not meta.get('generated_at'):
probs.append(Problem('warn','metadata_info.generated_at missing'))
# Per theme spot check (limit to first 50 to keep CI snappy)
for i, th in enumerate(themes[:50] if isinstance(themes, list) else []):
if not isinstance(th, dict):
probs.append(Problem('error', f'theme[{i}] not object'))
continue
if not th.get('id'):
probs.append(Problem('error', f'theme[{i}] id missing'))
if not th.get('theme'):
probs.append(Problem('error', f'theme[{i}] theme missing'))
syns = th.get('synergies')
if not isinstance(syns, list) or not syns:
probs.append(Problem('warn', f'theme[{i}] synergies empty or not list'))
if 'description' not in th:
probs.append(Problem('warn', f'theme[{i}] description missing'))
return probs
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description='Validate fast path theme catalog build presence & schema.')
ap.add_argument('--strict-warn', action='store_true', help='Promote warnings to errors (fail CI).')
args = ap.parse_args(argv)
if not THEME_LIST_PATH.exists():
print('ERROR: theme_list.json missing at expected path.', file=sys.stderr)
return 1
try:
data = load_json(THEME_LIST_PATH)
except FileNotFoundError:
print('ERROR: theme_list.json missing.', file=sys.stderr)
return 1
except Exception as e:
print(f'ERROR: failed parsing theme_list.json: {e}', file=sys.stderr)
return 1
problems = validate(data)
errors = [p for p in problems if p.level=='error']
warns = [p for p in problems if p.level=='warn']
for p in problems:
stream = sys.stderr if p.level!='info' else sys.stdout
print(repr(p), file=stream)
if errors:
return 1
if args.strict_warn and warns:
return 2
print(f"Fast path validation ok: {len(errors)} errors, {len(warns)} warnings. Checked {min(len(data.get('themes', [])),50)} themes.")
return 0
if __name__ == '__main__':
raise SystemExit(main(sys.argv[1:]))

View file

@ -0,0 +1,91 @@
"""Generate warm preview traffic to populate theme preview cache & metrics.
Usage:
python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
--base-url http://localhost:8000 --delay 0.05
Requirements:
- FastAPI server running locally exposing /themes endpoints
- WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
Strategy:
1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
2. Extract theme slugs via regex on data-theme-id attributes.
3. Issue REPEATS preview fragment requests per slug in order.
4. Print simple timing / status summary.
This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
"""
from __future__ import annotations
import argparse
import re
import time
import urllib.request
import urllib.error
from typing import List
LIST_PATH = "/themes/fragment/list"
PREVIEW_PATH = "/themes/fragment/preview/{slug}"
def fetch(url: str) -> str:
req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
with urllib.request.urlopen(req, timeout=15) as resp: # nosec B310 (local trusted)
return resp.read().decode("utf-8", "replace")
def extract_slugs(html: str, limit: int) -> List[str]:
slugs = []
for m in re.finditer(r'data-theme-id="([^"]+)"', html):
s = m.group(1).strip()
if s and s not in slugs:
slugs.append(s)
if len(slugs) >= limit:
break
return slugs
def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
print(f"[warm] Fetching list: {list_url}")
try:
html = fetch(list_url)
except urllib.error.URLError as e: # pragma: no cover
raise SystemExit(f"Failed fetching list: {e}")
slugs = extract_slugs(html, count)
if not slugs:
raise SystemExit("No theme slugs extracted cannot warm.")
print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
total_requests = 0
start = time.time()
for r in range(repeats):
print(f"[warm] Pass {r+1}/{repeats}")
for slug in slugs:
url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
try:
fetch(url)
except Exception as e: # pragma: no cover
print(f" [warn] Failed {slug}: {e}")
else:
total_requests += 1
if delay:
time.sleep(delay)
dur = time.time() - start
print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
print("[warm] Done. Now run metrics snapshot to capture warm p95.")
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description="Generate warm preview traffic")
ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
args = ap.parse_args(argv)
warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
return 0
if __name__ == "__main__": # pragma: no cover
import sys
raise SystemExit(main(sys.argv[1:]))