mtg_python_deckbuilder/code/web/services/theme_preview.py

863 lines
36 KiB
Python
Raw Normal View History

"""Theme preview sampling (Phase F enhanced sampling & diversity heuristics).
Summary of implemented capabilities and pending roadmap items documented inline.
"""
from __future__ import annotations
from pathlib import Path
import csv
import time
import random
from collections import OrderedDict, deque
from typing import List, Dict, Any, Optional, Tuple, Iterable
import os
import json
import threading
try:
import yaml # type: ignore
except Exception: # pragma: no cover - PyYAML already in requirements; defensive
yaml = None # type: ignore
from .theme_catalog_loader import load_index, slugify, project_detail
# NOTE: Remainder of module keeps large logic blocks; imports consolidated above per PEP8.
# Commander bias configuration constants
COMMANDER_COLOR_FILTER_STRICT = True # If commander found, restrict sample to its color identity (except colorless)
COMMANDER_OVERLAP_BONUS = 1.8 # additive score bonus for sharing at least one tag with commander
COMMANDER_THEME_MATCH_BONUS = 0.9 # extra if also matches theme directly
## (duplicate imports removed)
# Adaptive TTL configuration (can be toggled via THEME_PREVIEW_ADAPTIVE=1)
# Starts at a baseline and is adjusted up/down based on cache hit ratio bands.
TTL_SECONDS = 600 # current effective TTL (mutable)
_TTL_BASE = 600
_TTL_MIN = 300
_TTL_MAX = 900
_ADAPT_SAMPLE_WINDOW = 120 # number of recent requests to evaluate
_ADAPTATION_ENABLED = (os.getenv("THEME_PREVIEW_ADAPTIVE") or "").lower() in {"1","true","yes","on"}
_RECENT_HITS: deque[bool] = deque(maxlen=_ADAPT_SAMPLE_WINDOW)
_LAST_ADAPT_AT: float | None = None
_ADAPT_INTERVAL_S = 30 # do not adapt more often than every 30s
_BG_REFRESH_THREAD_STARTED = False
_BG_REFRESH_INTERVAL_S = int(os.getenv("THEME_PREVIEW_BG_REFRESH_INTERVAL") or 120)
_BG_REFRESH_ENABLED = (os.getenv("THEME_PREVIEW_BG_REFRESH") or "").lower() in {"1","true","yes","on"}
# Adaptive background refresh heuristics (P2): we will adjust per-loop sleep based on
# recent error rate & p95 build latency. Bounds: [30s, 5 * base interval].
_BG_REFRESH_MIN = 30
_BG_REFRESH_MAX = max(300, _BG_REFRESH_INTERVAL_S * 5)
# Per-theme error histogram (P2 observability)
_PREVIEW_PER_THEME_ERRORS: Dict[str, int] = {}
# Optional curated synergy pair matrix externalization (P2 DATA).
_CURATED_SYNERGY_MATRIX_PATH = Path("config/themes/curated_synergy_matrix.yml")
_CURATED_SYNERGY_MATRIX: Dict[str, Dict[str, Any]] | None = None
def _load_curated_synergy_matrix() -> None:
global _CURATED_SYNERGY_MATRIX
if _CURATED_SYNERGY_MATRIX is not None:
return
if not _CURATED_SYNERGY_MATRIX_PATH.exists() or yaml is None:
_CURATED_SYNERGY_MATRIX = None
return
try:
with _CURATED_SYNERGY_MATRIX_PATH.open('r', encoding='utf-8') as fh:
data = yaml.safe_load(fh) or {}
if isinstance(data, dict):
# Expect top-level key 'pairs' but allow raw mapping
pairs = data.get('pairs', data)
if isinstance(pairs, dict):
_CURATED_SYNERGY_MATRIX = pairs # type: ignore
else:
_CURATED_SYNERGY_MATRIX = None
else:
_CURATED_SYNERGY_MATRIX = None
except Exception:
_CURATED_SYNERGY_MATRIX = None
_load_curated_synergy_matrix()
def _maybe_adapt_ttl(now: float) -> None:
"""Adjust global TTL_SECONDS based on recent hit ratio bands.
Strategy:
- If hit ratio < 0.25: decrease TTL slightly (favor freshness) ( -60s )
- If hit ratio between 0.250.55: gently nudge toward base ( +/- 30s toward _TTL_BASE )
- If hit ratio between 0.550.75: slight increase (+60s) (stability payoff)
- If hit ratio > 0.75: stronger increase (+90s) to leverage locality
Never exceeds [_TTL_MIN, _TTL_MAX]. Only runs if enough samples.
"""
global TTL_SECONDS, _LAST_ADAPT_AT
if not _ADAPTATION_ENABLED:
return
if len(_RECENT_HITS) < max(30, int(_ADAPT_SAMPLE_WINDOW * 0.5)):
return # insufficient data
if _LAST_ADAPT_AT and (now - _LAST_ADAPT_AT) < _ADAPT_INTERVAL_S:
return
hit_ratio = sum(1 for h in _RECENT_HITS if h) / len(_RECENT_HITS)
new_ttl = TTL_SECONDS
if hit_ratio < 0.25:
new_ttl = max(_TTL_MIN, TTL_SECONDS - 60)
elif hit_ratio < 0.55:
# move 30s toward base
if TTL_SECONDS > _TTL_BASE:
new_ttl = max(_TTL_BASE, TTL_SECONDS - 30)
elif TTL_SECONDS < _TTL_BASE:
new_ttl = min(_TTL_BASE, TTL_SECONDS + 30)
elif hit_ratio < 0.75:
new_ttl = min(_TTL_MAX, TTL_SECONDS + 60)
else:
new_ttl = min(_TTL_MAX, TTL_SECONDS + 90)
if new_ttl != TTL_SECONDS:
TTL_SECONDS = new_ttl
try:
print(json.dumps({"event":"theme_preview_ttl_adapt","hit_ratio":round(hit_ratio,3),"ttl":TTL_SECONDS})) # noqa: T201
except Exception:
pass
_LAST_ADAPT_AT = now
def _compute_bg_interval() -> int:
"""Derive adaptive sleep interval using recent metrics (P2 PERF)."""
try:
m = preview_metrics()
p95 = float(m.get('preview_p95_build_ms') or 0.0)
err_rate = float(m.get('preview_error_rate_pct') or 0.0)
base = _BG_REFRESH_INTERVAL_S
# Heuristic: high latency -> lengthen interval slightly (avoid stampede), high error rate -> shorten (refresh quicker)
interval = base
if p95 > 350: # slow builds
interval = int(base * 1.75)
elif p95 > 250:
interval = int(base * 1.4)
elif p95 < 120:
interval = int(base * 0.85)
# Error rate influence
if err_rate > 5.0:
interval = max(_BG_REFRESH_MIN, int(interval * 0.6))
elif err_rate < 1.0 and p95 < 180:
# Very healthy -> stretch slightly (less churn)
interval = min(_BG_REFRESH_MAX, int(interval * 1.15))
return max(_BG_REFRESH_MIN, min(_BG_REFRESH_MAX, interval))
except Exception:
return max(_BG_REFRESH_MIN, _BG_REFRESH_INTERVAL_S)
def _bg_refresh_loop(): # pragma: no cover (background behavior)
import time as _t
while True:
if not _BG_REFRESH_ENABLED:
return
try:
ranked = sorted(_PREVIEW_PER_THEME_REQUESTS.items(), key=lambda kv: kv[1], reverse=True)
top = [slug for slug,_cnt in ranked[:10]]
for slug in top:
try:
get_theme_preview(slug, limit=12, colors=None, commander=None, uncapped=True)
except Exception:
continue
except Exception:
pass
_t.sleep(_compute_bg_interval())
def _ensure_bg_refresh_thread(): # pragma: no cover
global _BG_REFRESH_THREAD_STARTED
if _BG_REFRESH_THREAD_STARTED or not _BG_REFRESH_ENABLED:
return
try:
th = threading.Thread(target=_bg_refresh_loop, name="theme_preview_bg_refresh", daemon=True)
th.start()
_BG_REFRESH_THREAD_STARTED = True
except Exception:
pass
_PREVIEW_CACHE: "OrderedDict[Tuple[str, int, str | None, str | None, str], Dict[str, Any]]" = OrderedDict()
_CARD_INDEX: Dict[str, List[Dict[str, Any]]] = {}
_CARD_INDEX_MTIME: float | None = None
_PREVIEW_REQUESTS = 0
_PREVIEW_CACHE_HITS = 0
_PREVIEW_ERROR_COUNT = 0 # rolling count of preview build failures (non-cache operational)
_PREVIEW_REQUEST_ERROR_COUNT = 0 # client side reported fetch errors
_PREVIEW_BUILD_MS_TOTAL = 0.0
_PREVIEW_BUILD_COUNT = 0
_PREVIEW_LAST_BUST_AT: float | None = None
# Per-theme stats and global distribution tracking
_PREVIEW_PER_THEME: Dict[str, Dict[str, Any]] = {}
_PREVIEW_PER_THEME_REQUESTS: Dict[str, int] = {}
_BUILD_DURATIONS = deque(maxlen=500) # rolling window for percentile calc
_ROLE_GLOBAL_COUNTS: Dict[str, int] = {"payoff": 0, "enabler": 0, "support": 0, "wildcard": 0}
_CURATED_GLOBAL = 0 # example + curated_synergy (non-synthetic curated content)
_SAMPLED_GLOBAL = 0
# Rarity normalization mapping (baseline extend as new variants appear)
_RARITY_NORM = {
"mythic rare": "mythic",
"mythic": "mythic",
"m": "mythic",
"rare": "rare",
"r": "rare",
"uncommon": "uncommon",
"u": "uncommon",
"common": "common",
"c": "common",
}
def _normalize_rarity(raw: str) -> str:
r = (raw or "").strip().lower()
return _RARITY_NORM.get(r, r)
def _preview_cache_max() -> int:
try:
val_raw = (__import__('os').getenv('THEME_PREVIEW_CACHE_MAX') or '400')
val = int(val_raw)
if val <= 0:
raise ValueError("cache max must be >0")
return val
except Exception:
# Emit single-line warning (stdout) diagnostics style (won't break)
try:
print(json.dumps({"event":"theme_preview_cache_config_warning","message":"Invalid THEME_PREVIEW_CACHE_MAX; using default 400"})) # noqa: T201
except Exception:
pass
return 400
def _enforce_cache_limit():
try:
limit = max(50, _preview_cache_max())
while len(_PREVIEW_CACHE) > limit:
_PREVIEW_CACHE.popitem(last=False) # FIFO eviction
except Exception:
pass
CARD_FILES_GLOB = [
Path("csv_files/blue_cards.csv"),
Path("csv_files/white_cards.csv"),
Path("csv_files/black_cards.csv"),
Path("csv_files/red_cards.csv"),
Path("csv_files/green_cards.csv"),
Path("csv_files/colorless_cards.csv"),
Path("csv_files/cards.csv"), # fallback large file last
]
THEME_TAGS_COL = "themeTags"
NAME_COL = "name"
COLOR_IDENTITY_COL = "colorIdentity"
MANA_COST_COL = "manaCost"
RARITY_COL = "rarity" # Some CSVs may not include; optional
def _maybe_build_card_index():
global _CARD_INDEX, _CARD_INDEX_MTIME
latest = 0.0
mtimes: List[float] = []
for p in CARD_FILES_GLOB:
if p.exists():
mt = p.stat().st_mtime
mtimes.append(mt)
if mt > latest:
latest = mt
if _CARD_INDEX and _CARD_INDEX_MTIME and latest <= _CARD_INDEX_MTIME:
return
# Rebuild index
_CARD_INDEX = {}
for p in CARD_FILES_GLOB:
if not p.exists():
continue
try:
with p.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
if not reader.fieldnames or THEME_TAGS_COL not in reader.fieldnames:
continue
for row in reader:
name = row.get(NAME_COL) or row.get("faceName") or ""
tags_raw = row.get(THEME_TAGS_COL) or ""
# tags stored like "['Blink', 'Enter the Battlefield']"; naive parse
tags = [t.strip(" '[]") for t in tags_raw.split(',') if t.strip()] if tags_raw else []
if not tags:
continue
color_id = (row.get(COLOR_IDENTITY_COL) or "").strip()
mana_cost = (row.get(MANA_COST_COL) or "").strip()
rarity = _normalize_rarity(row.get(RARITY_COL) or "")
for tg in tags:
if not tg:
continue
_CARD_INDEX.setdefault(tg, []).append({
"name": name,
"color_identity": color_id,
"tags": tags,
"mana_cost": mana_cost,
"rarity": rarity,
# Pre-parsed helpers (color identity list & pip colors from mana cost)
"color_identity_list": list(color_id) if color_id else [],
"pip_colors": [c for c in mana_cost if c in {"W","U","B","R","G"}],
})
except Exception:
continue
_CARD_INDEX_MTIME = latest
def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
tag_set = set(tags)
synergy_overlap = tag_set.intersection(synergies)
if theme in tag_set:
return "payoff"
if len(synergy_overlap) >= 2:
return "enabler"
if len(synergy_overlap) == 1:
return "support"
return "wildcard"
def _seed_from(theme: str, commander: Optional[str]) -> int:
base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
# simple deterministic hash (stable across runs within Python version keep primitive)
h = 0
for b in base:
h = (h * 131 + b) & 0xFFFFFFFF
return h or 1
def _deterministic_shuffle(items: List[Any], seed: int) -> None:
rnd = random.Random(seed)
rnd.shuffle(items)
def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
tag_set = set(tags)
synergy_overlap = len(tag_set.intersection(synergies))
score = 0.0
if theme in tag_set:
score += 3.0
score += synergy_overlap * 1.2
# Role weight baseline
role_weights = {
"payoff": 2.5,
"enabler": 2.0,
"support": 1.5,
"wildcard": 0.9,
}
score += role_weights.get(role, 0.5)
# Base rarity weighting (future: dynamic diminishing duplicate penalty)
# Access rarity via closure later by augmenting item after score (handled outside)
return score
def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
"""Refined overlap scaling: only synergy tag intersections count toward diminishing curve.
Uses geometric diminishing returns: bonus = B * (1 - 0.5 ** n) where n is synergy overlap count.
Guarantees first overlap grants 50% of base, second 75%, third 87.5%, asymptotically approaching B.
"""
if not commander_tags or not synergy_set:
return 0.0
overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
if overlap_synergy <= 0:
return 0.0
return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))
def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]:
if not commander:
return None
_maybe_build_card_index()
# Commander can appear under many tags; brute scan limited to first match
needle = commander.lower().strip()
for tag_cards in _CARD_INDEX.values():
for c in tag_cards:
if c.get("name", "").lower() == needle:
return c
return None
def _sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[Dict[str, Any]]:
_maybe_build_card_index()
pool = _CARD_INDEX.get(theme) or []
if not pool:
return []
commander_card = _lookup_commander(commander)
commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
if colors_filter:
allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
if allowed:
pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
# Apply commander color identity restriction if configured
if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
# Allow single off-color splash for 4-5 color commanders (leniency policy) with later mild penalty
allow_splash = len(commander_colors) >= 4
new_pool = []
for c in pool:
ci = set(c.get("color_identity", ""))
if not ci or ci.issubset(commander_colors):
new_pool.append(c)
continue
if allow_splash:
off = ci - commander_colors
if len(off) == 1: # single off-color splash
# mark for later penalty (avoid mutating shared index structure deeply; tag ephemeral flag)
c["_splash_off_color"] = True # type: ignore
new_pool.append(c)
continue
pool = new_pool
# Build role buckets
seen_names: set[str] = set()
payoff: List[Dict[str, Any]] = []
enabler: List[Dict[str, Any]] = []
support: List[Dict[str, Any]] = []
wildcard: List[Dict[str, Any]] = []
rarity_counts: Dict[str, int] = {}
synergy_set = set(synergies)
# Rarity calibration (P2 SAMPLING): allow tuning via env; default adjusted after observation.
rarity_weight_base = {
"mythic": float(os.getenv("RARITY_W_MYTHIC", "1.2")),
"rare": float(os.getenv("RARITY_W_RARE", "0.9")),
"uncommon": float(os.getenv("RARITY_W_UNCOMMON", "0.65")),
"common": float(os.getenv("RARITY_W_COMMON", "0.4")),
}
for raw in pool:
nm = raw.get("name")
if not nm or nm in seen_names:
continue
seen_names.add(nm)
tags = raw.get("tags", [])
role = _classify_role(theme, synergies, tags)
score = _score_card(theme, synergies, role, tags)
reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
if commander_card:
if theme in tags:
score += COMMANDER_THEME_MATCH_BONUS
reasons.append("commander_theme_match")
scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
if scaled:
score += scaled
reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
reasons.append("commander_bias")
rarity = raw.get("rarity") or ""
if rarity:
base_rarity_weight = rarity_weight_base.get(rarity, 0.25)
count_so_far = rarity_counts.get(rarity, 0)
# Diminishing influence: divide by (1 + 0.4 * duplicates_already)
score += base_rarity_weight / (1 + 0.4 * count_so_far)
rarity_counts[rarity] = count_so_far + 1
reasons.append(f"rarity_weight_calibrated:{rarity}:{round(base_rarity_weight/(1+0.4*count_so_far),2)}")
# Splash leniency penalty (applied after other scoring)
if raw.get("_splash_off_color"):
score -= 0.3
reasons.append("splash_off_color_penalty:-0.3")
item = {
"name": nm,
"colors": list(raw.get("color_identity", "")),
"roles": [role],
"tags": tags,
"score": score,
"reasons": reasons,
"mana_cost": raw.get("mana_cost"),
"rarity": rarity,
# Newly exposed server authoritative parsed helpers
"color_identity_list": raw.get("color_identity_list", []),
"pip_colors": raw.get("pip_colors", []),
}
if role == "payoff":
payoff.append(item)
elif role == "enabler":
enabler.append(item)
elif role == "support":
support.append(item)
else:
wildcard.append(item)
# Deterministic shuffle inside each bucket to avoid bias from CSV ordering
seed = _seed_from(theme, commander)
for bucket in (payoff, enabler, support, wildcard):
_deterministic_shuffle(bucket, seed)
# stable secondary ordering: higher score first, then name
bucket.sort(key=lambda x: (-x["score"], x["name"]))
# Diversity targets (after curated examples are pinned externally)
target_payoff = max(1, int(round(limit * 0.4)))
target_enabler_support = max(1, int(round(limit * 0.4)))
# support grouped with enabler for quota distribution
target_wild = max(0, limit - target_payoff - target_enabler_support)
def take(n: int, source: List[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
for i in range(min(n, len(source))):
yield source[i]
chosen: List[Dict[str, Any]] = []
# Collect payoff
chosen.extend(take(target_payoff, payoff))
# Collect enabler + support mix
remaining_for_enab = target_enabler_support
es_combined = enabler + support
chosen.extend(take(remaining_for_enab, es_combined))
# Collect wildcards
chosen.extend(take(target_wild, wildcard))
# If still short fill from remaining (payoff first, then enab, support, wildcard)
if len(chosen) < limit:
def fill_from(src: List[Dict[str, Any]]):
nonlocal chosen
for it in src:
if len(chosen) >= limit:
break
if it not in chosen:
chosen.append(it)
for bucket in (payoff, enabler, support, wildcard):
fill_from(bucket)
# Role saturation penalty (post-selection adjustment): discourage dominance overflow beyond soft thresholds
role_soft_caps = {
"payoff": int(round(limit * 0.5)),
"enabler": int(round(limit * 0.35)),
"support": int(round(limit * 0.35)),
"wildcard": int(round(limit * 0.25)),
}
role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
for it in chosen:
r = (it.get("roles") or [None])[0]
if not r or r not in role_soft_caps:
continue
role_seen[r] += 1
if role_seen[r] > max(1, role_soft_caps[r]):
it["score"] = it.get("score", 0) - 0.4
(it.setdefault("reasons", [])).append("role_saturation_penalty:-0.4")
# Truncate and re-rank final sequence deterministically by score then name (already ordered by selection except fill)
if len(chosen) > limit:
chosen = chosen[:limit]
# Normalize score scale (optional future; keep raw for now)
return chosen
# key: (slug, limit, colors, commander, etag)
def _now() -> float: # small indirection for future test monkeypatch
return time.time()
def _build_stub_items(detail: Dict[str, Any], limit: int, colors_filter: Optional[str], *, commander: Optional[str]) -> List[Dict[str, Any]]:
items: List[Dict[str, Any]] = []
# Start with curated example cards if present, else generic example_cards
curated_cards = detail.get("example_cards") or []
for idx, name in enumerate(curated_cards):
if len(items) >= limit:
break
items.append({
"name": name,
"colors": [], # unknown without deeper card DB link
"roles": ["example"],
"tags": [],
"score": float(limit - idx), # simple descending score
"reasons": ["curated_example"],
})
# Curated synergy example cards (if any) follow standard examples but before sampled
synergy_curated = detail.get("synergy_example_cards") or []
for name in synergy_curated:
if len(items) >= limit:
break
# Skip duplicates with example_cards
if any(it["name"] == name for it in items):
continue
items.append({
"name": name,
"colors": [],
"roles": ["curated_synergy"],
"tags": [],
"score": max((it["score"] for it in items), default=1.0) - 0.1, # just below top examples
"reasons": ["curated_synergy_example"],
})
# Remaining slots after curated examples
remaining = max(0, limit - len(items))
if remaining:
theme_name = detail.get("theme")
if isinstance(theme_name, str):
all_synergies = []
# Use uncapped synergies if available else merged list
if detail.get("uncapped_synergies"):
all_synergies = detail.get("uncapped_synergies") or []
else:
# Combine curated/enforced/inferred
seen = set()
for blk in (detail.get("curated_synergies") or [], detail.get("enforced_synergies") or [], detail.get("inferred_synergies") or []):
for s in blk:
if s not in seen:
all_synergies.append(s)
seen.add(s)
real_cards = _sample_real_cards_for_theme(theme_name, remaining, colors_filter, synergies=all_synergies, commander=commander)
for rc in real_cards:
if len(items) >= limit:
break
items.append(rc)
if len(items) < limit:
# Pad using synergies as synthetic placeholders to reach requested size
synergies = detail.get("uncapped_synergies") or detail.get("synergies") or []
for s in synergies:
if len(items) >= limit:
break
synthetic_name = f"[{s}]"
items.append({
"name": synthetic_name,
"colors": [],
"roles": ["synthetic"],
"tags": [s],
"score": 0.5, # lower score to keep curated first
"reasons": ["synthetic_synergy_placeholder"],
})
return items
def get_theme_preview(theme_id: str, *, limit: int = 12, colors: Optional[str] = None, commander: Optional[str] = None, uncapped: bool = True) -> Dict[str, Any]:
global _PREVIEW_REQUESTS, _PREVIEW_CACHE_HITS, _PREVIEW_BUILD_MS_TOTAL, _PREVIEW_BUILD_COUNT
idx = load_index()
slug = slugify(theme_id)
entry = idx.slug_to_entry.get(slug)
if not entry:
raise KeyError("theme_not_found")
# Use uncapped synergies for better placeholder coverage (diagnostics flag gating not applied here; placeholder only)
detail = project_detail(slug, entry, idx.slug_to_yaml, uncapped=uncapped)
colors_key = colors or None
commander_key = commander or None
cache_key = (slug, limit, colors_key, commander_key, idx.etag)
_PREVIEW_REQUESTS += 1
cached = _PREVIEW_CACHE.get(cache_key)
if cached and (_now() - cached["_cached_at"]) < TTL_SECONDS:
_PREVIEW_CACHE_HITS += 1
_RECENT_HITS.append(True)
# Count request (even if cache hit) for per-theme metrics
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
# Structured cache hit log (diagnostics gated)
try:
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
print(json.dumps({
"event": "theme_preview_cache_hit",
"theme": slug,
"limit": limit,
"colors": colors_key,
"commander": commander_key,
"ttl_remaining_s": round(TTL_SECONDS - (_now() - cached["_cached_at"]), 2)
}, separators=(",",":"))) # noqa: T201
except Exception:
pass
# Annotate cache hit flag (shallow copy to avoid mutating stored payload timings)
payload_cached = dict(cached["payload"])
payload_cached["cache_hit"] = True
return payload_cached
_RECENT_HITS.append(False)
# Build items
t0 = _now()
try:
items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
except Exception as e:
# Record error histogram & propagate
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
_PREVIEW_ERROR_COUNT += 1 # type: ignore
raise e
# Race condition guard (P2 RESILIENCE): If we somehow produced an empty sample (e.g., catalog rebuild mid-flight)
# retry a limited number of times with small backoff.
if not items:
for _retry in range(2): # up to 2 retries
time.sleep(0.05)
try:
items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
except Exception:
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
_PREVIEW_ERROR_COUNT += 1 # type: ignore
break
if items:
try:
print(json.dumps({"event":"theme_preview_retry_after_empty","theme":slug})) # noqa: T201
except Exception:
pass
break
build_ms = (_now() - t0) * 1000.0
_PREVIEW_BUILD_MS_TOTAL += build_ms
_PREVIEW_BUILD_COUNT += 1
# Duplicate suppression safety across roles (should already be unique, defensive)
seen_names: set[str] = set()
dedup: List[Dict[str, Any]] = []
for it in items:
nm = it.get("name")
if not nm:
continue
if nm in seen_names:
continue
seen_names.add(nm)
dedup.append(it)
items = dedup
# Aggregate statistics
curated_count = sum(1 for i in items if any(r in {"example", "curated_synergy"} for r in (i.get("roles") or [])))
sampled_core_roles = {"payoff", "enabler", "support", "wildcard"}
role_counts_local: Dict[str, int] = {r: 0 for r in sampled_core_roles}
for i in items:
roles = i.get("roles") or []
for r in roles:
if r in role_counts_local:
role_counts_local[r] += 1
# Update global counters
global _ROLE_GLOBAL_COUNTS, _CURATED_GLOBAL, _SAMPLED_GLOBAL
for r, c in role_counts_local.items():
_ROLE_GLOBAL_COUNTS[r] = _ROLE_GLOBAL_COUNTS.get(r, 0) + c
_CURATED_GLOBAL += curated_count
_SAMPLED_GLOBAL += sum(role_counts_local.values())
_BUILD_DURATIONS.append(build_ms)
per = _PREVIEW_PER_THEME.setdefault(slug, {"builds": 0, "total_ms": 0.0, "durations": deque(maxlen=50), "role_counts": {r: 0 for r in sampled_core_roles}, "curated": 0, "sampled": 0})
per["builds"] += 1
per["total_ms"] += build_ms
per["durations"].append(build_ms)
per["curated"] += curated_count
per["sampled"] += sum(role_counts_local.values())
for r, c in role_counts_local.items():
per["role_counts"][r] = per["role_counts"].get(r, 0) + c
synergies_used = detail.get("uncapped_synergies") or detail.get("synergies") or []
payload = {
"theme_id": slug,
"theme": detail.get("theme"),
"count_total": len(items), # population size TBD when full sampling added
"sample": items,
"synergies_used": synergies_used,
"generated_at": idx.catalog.metadata_info.generated_at if idx.catalog.metadata_info else None,
"colors_filter": colors_key,
"commander": commander_key,
"stub": False if any(it.get("roles") and it["roles"][0] in {"payoff", "support", "enabler", "wildcard"} for it in items) else True,
"role_counts": role_counts_local,
"curated_pct": round((curated_count / max(1, len(items))) * 100, 2),
"build_ms": round(build_ms, 2),
"curated_total": curated_count,
"sampled_total": sum(role_counts_local.values()),
"cache_hit": False,
}
_PREVIEW_CACHE[cache_key] = {"payload": payload, "_cached_at": _now()}
_PREVIEW_CACHE.move_to_end(cache_key)
_enforce_cache_limit()
# Track request count post-build
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
# Structured logging (opt-in)
try:
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
log_obj = {
"event": "theme_preview_build",
"theme": slug,
"limit": limit,
"colors": colors_key,
"commander": commander_key,
"build_ms": round(build_ms, 2),
"curated_pct": payload["curated_pct"],
"curated_total": payload["curated_total"],
"sampled_total": payload["sampled_total"],
"role_counts": role_counts_local,
"cache_hit": False,
}
print(json.dumps(log_obj, separators=(",",":"))) # noqa: T201
except Exception:
pass
# Post-build adaptive TTL evaluation & background refresher initialization
_maybe_adapt_ttl(_now())
_ensure_bg_refresh_thread()
return payload
def _percentile(sorted_vals: List[float], pct: float) -> float:
if not sorted_vals:
return 0.0
k = (len(sorted_vals) - 1) * pct
f = int(k)
c = min(f + 1, len(sorted_vals) - 1)
if f == c:
return sorted_vals[f]
d0 = sorted_vals[f] * (c - k)
d1 = sorted_vals[c] * (k - f)
return d0 + d1
def preview_metrics() -> Dict[str, Any]:
avg_ms = (_PREVIEW_BUILD_MS_TOTAL / _PREVIEW_BUILD_COUNT) if _PREVIEW_BUILD_COUNT else 0.0
durations_list = sorted(list(_BUILD_DURATIONS))
p95 = _percentile(durations_list, 0.95)
# Role distribution actual vs target (aggregate)
total_roles = sum(_ROLE_GLOBAL_COUNTS.values()) or 1
target = {"payoff": 0.4, "enabler+support": 0.4, "wildcard": 0.2}
actual_enabler_support = (_ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0)) / total_roles
role_distribution = {
"payoff": {
"count": _ROLE_GLOBAL_COUNTS.get("payoff", 0),
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("payoff", 0) / total_roles) * 100, 2),
"target_pct": target["payoff"] * 100,
},
"enabler_support": {
"count": _ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0),
"actual_pct": round(actual_enabler_support * 100, 2),
"target_pct": target["enabler+support"] * 100,
},
"wildcard": {
"count": _ROLE_GLOBAL_COUNTS.get("wildcard", 0),
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("wildcard", 0) / total_roles) * 100, 2),
"target_pct": target["wildcard"] * 100,
},
}
editorial_coverage_pct = round((_CURATED_GLOBAL / max(1, (_CURATED_GLOBAL + _SAMPLED_GLOBAL))) * 100, 2)
per_theme_stats = {}
for slug, data in list(_PREVIEW_PER_THEME.items())[:50]:
durs = list(data.get("durations", []))
sd = sorted(durs)
p50 = _percentile(sd, 0.50)
p95_local = _percentile(sd, 0.95)
per_theme_stats[slug] = {
"avg_ms": round(data["total_ms"] / max(1, data["builds"]), 2),
"p50_ms": round(p50, 2),
"p95_ms": round(p95_local, 2),
"builds": data["builds"],
"avg_curated_pct": round((data["curated"] / max(1, (data["curated"] + data["sampled"])) ) * 100, 2),
"requests": _PREVIEW_PER_THEME_REQUESTS.get(slug, 0),
"curated_total": data.get("curated", 0),
"sampled_total": data.get("sampled", 0),
}
error_rate = 0.0
total_req = _PREVIEW_REQUESTS or 0
if total_req:
error_rate = round((_PREVIEW_ERROR_COUNT / total_req) * 100, 2)
# Example coverage enforcement flag: when curated coverage exceeds threshold (default 90%)
try:
enforce_threshold = float(os.getenv("EXAMPLE_ENFORCE_THRESHOLD", "90"))
except Exception:
enforce_threshold = 90.0
example_enforcement_active = editorial_coverage_pct >= enforce_threshold
return {
"preview_requests": _PREVIEW_REQUESTS,
"preview_cache_hits": _PREVIEW_CACHE_HITS,
"preview_cache_entries": len(_PREVIEW_CACHE),
"preview_avg_build_ms": round(avg_ms, 2),
"preview_p95_build_ms": round(p95, 2),
"preview_error_rate_pct": error_rate,
"preview_client_fetch_errors": _PREVIEW_REQUEST_ERROR_COUNT,
"preview_ttl_seconds": TTL_SECONDS,
"preview_ttl_adaptive": _ADAPTATION_ENABLED,
"preview_ttl_window": len(_RECENT_HITS),
"preview_last_bust_at": _PREVIEW_LAST_BUST_AT,
"role_distribution": role_distribution,
"editorial_curated_vs_sampled_pct": editorial_coverage_pct,
"example_enforcement_active": example_enforcement_active,
"example_enforce_threshold_pct": enforce_threshold,
"editorial_curated_total": _CURATED_GLOBAL,
"editorial_sampled_total": _SAMPLED_GLOBAL,
"per_theme": per_theme_stats,
"per_theme_errors": dict(list(_PREVIEW_PER_THEME_ERRORS.items())[:50]),
"curated_synergy_matrix_loaded": _CURATED_SYNERGY_MATRIX is not None,
"curated_synergy_matrix_size": sum(len(v) for v in _CURATED_SYNERGY_MATRIX.values()) if _CURATED_SYNERGY_MATRIX else 0,
}
def bust_preview_cache(reason: str | None = None) -> None:
"""Clear in-memory preview cache (e.g., after catalog rebuild or tagging).
Exposed for orchestrator hooks. Keeps metrics counters (requests/hits) for
observability; records last bust timestamp.
"""
global _PREVIEW_CACHE, _PREVIEW_LAST_BUST_AT
try: # defensive; never raise
_PREVIEW_CACHE.clear()
import time as _t
_PREVIEW_LAST_BUST_AT = _t.time()
except Exception:
pass