mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
Preview endpoint + fast caches; curated pins + role quotas + rarity/overlap tuning; catalog+preview metrics; governance enforcement flags; server mana/color identity fields; docs/tests/scripts updated.
862 lines
36 KiB
Python
862 lines
36 KiB
Python
"""Theme preview sampling (Phase F – enhanced sampling & diversity heuristics).
|
||
|
||
Summary of implemented capabilities and pending roadmap items documented inline.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
import csv
|
||
import time
|
||
import random
|
||
from collections import OrderedDict, deque
|
||
from typing import List, Dict, Any, Optional, Tuple, Iterable
|
||
import os
|
||
import json
|
||
import threading
|
||
|
||
try:
|
||
import yaml # type: ignore
|
||
except Exception: # pragma: no cover - PyYAML already in requirements; defensive
|
||
yaml = None # type: ignore
|
||
|
||
from .theme_catalog_loader import load_index, slugify, project_detail
|
||
|
||
# NOTE: Remainder of module keeps large logic blocks; imports consolidated above per PEP8.
|
||
|
||
# Commander bias configuration constants
|
||
COMMANDER_COLOR_FILTER_STRICT = True # If commander found, restrict sample to its color identity (except colorless)
|
||
COMMANDER_OVERLAP_BONUS = 1.8 # additive score bonus for sharing at least one tag with commander
|
||
COMMANDER_THEME_MATCH_BONUS = 0.9 # extra if also matches theme directly
|
||
|
||
## (duplicate imports removed)
|
||
|
||
# Adaptive TTL configuration (can be toggled via THEME_PREVIEW_ADAPTIVE=1)
|
||
# Starts at a baseline and is adjusted up/down based on cache hit ratio bands.
|
||
TTL_SECONDS = 600 # current effective TTL (mutable)
|
||
_TTL_BASE = 600
|
||
_TTL_MIN = 300
|
||
_TTL_MAX = 900
|
||
_ADAPT_SAMPLE_WINDOW = 120 # number of recent requests to evaluate
|
||
_ADAPTATION_ENABLED = (os.getenv("THEME_PREVIEW_ADAPTIVE") or "").lower() in {"1","true","yes","on"}
|
||
_RECENT_HITS: deque[bool] = deque(maxlen=_ADAPT_SAMPLE_WINDOW)
|
||
_LAST_ADAPT_AT: float | None = None
|
||
_ADAPT_INTERVAL_S = 30 # do not adapt more often than every 30s
|
||
|
||
_BG_REFRESH_THREAD_STARTED = False
|
||
_BG_REFRESH_INTERVAL_S = int(os.getenv("THEME_PREVIEW_BG_REFRESH_INTERVAL") or 120)
|
||
_BG_REFRESH_ENABLED = (os.getenv("THEME_PREVIEW_BG_REFRESH") or "").lower() in {"1","true","yes","on"}
|
||
|
||
# Adaptive background refresh heuristics (P2): we will adjust per-loop sleep based on
|
||
# recent error rate & p95 build latency. Bounds: [30s, 5 * base interval].
|
||
_BG_REFRESH_MIN = 30
|
||
_BG_REFRESH_MAX = max(300, _BG_REFRESH_INTERVAL_S * 5)
|
||
|
||
# Per-theme error histogram (P2 observability)
|
||
_PREVIEW_PER_THEME_ERRORS: Dict[str, int] = {}
|
||
|
||
# Optional curated synergy pair matrix externalization (P2 DATA).
|
||
_CURATED_SYNERGY_MATRIX_PATH = Path("config/themes/curated_synergy_matrix.yml")
|
||
_CURATED_SYNERGY_MATRIX: Dict[str, Dict[str, Any]] | None = None
|
||
|
||
def _load_curated_synergy_matrix() -> None:
|
||
global _CURATED_SYNERGY_MATRIX
|
||
if _CURATED_SYNERGY_MATRIX is not None:
|
||
return
|
||
if not _CURATED_SYNERGY_MATRIX_PATH.exists() or yaml is None:
|
||
_CURATED_SYNERGY_MATRIX = None
|
||
return
|
||
try:
|
||
with _CURATED_SYNERGY_MATRIX_PATH.open('r', encoding='utf-8') as fh:
|
||
data = yaml.safe_load(fh) or {}
|
||
if isinstance(data, dict):
|
||
# Expect top-level key 'pairs' but allow raw mapping
|
||
pairs = data.get('pairs', data)
|
||
if isinstance(pairs, dict):
|
||
_CURATED_SYNERGY_MATRIX = pairs # type: ignore
|
||
else:
|
||
_CURATED_SYNERGY_MATRIX = None
|
||
else:
|
||
_CURATED_SYNERGY_MATRIX = None
|
||
except Exception:
|
||
_CURATED_SYNERGY_MATRIX = None
|
||
|
||
_load_curated_synergy_matrix()
|
||
|
||
def _maybe_adapt_ttl(now: float) -> None:
|
||
"""Adjust global TTL_SECONDS based on recent hit ratio bands.
|
||
|
||
Strategy:
|
||
- If hit ratio < 0.25: decrease TTL slightly (favor freshness) ( -60s )
|
||
- If hit ratio between 0.25–0.55: gently nudge toward base ( +/- 30s toward _TTL_BASE )
|
||
- If hit ratio between 0.55–0.75: slight increase (+60s) (stability payoff)
|
||
- If hit ratio > 0.75: stronger increase (+90s) to leverage locality
|
||
Never exceeds [_TTL_MIN, _TTL_MAX]. Only runs if enough samples.
|
||
"""
|
||
global TTL_SECONDS, _LAST_ADAPT_AT
|
||
if not _ADAPTATION_ENABLED:
|
||
return
|
||
if len(_RECENT_HITS) < max(30, int(_ADAPT_SAMPLE_WINDOW * 0.5)):
|
||
return # insufficient data
|
||
if _LAST_ADAPT_AT and (now - _LAST_ADAPT_AT) < _ADAPT_INTERVAL_S:
|
||
return
|
||
hit_ratio = sum(1 for h in _RECENT_HITS if h) / len(_RECENT_HITS)
|
||
new_ttl = TTL_SECONDS
|
||
if hit_ratio < 0.25:
|
||
new_ttl = max(_TTL_MIN, TTL_SECONDS - 60)
|
||
elif hit_ratio < 0.55:
|
||
# move 30s toward base
|
||
if TTL_SECONDS > _TTL_BASE:
|
||
new_ttl = max(_TTL_BASE, TTL_SECONDS - 30)
|
||
elif TTL_SECONDS < _TTL_BASE:
|
||
new_ttl = min(_TTL_BASE, TTL_SECONDS + 30)
|
||
elif hit_ratio < 0.75:
|
||
new_ttl = min(_TTL_MAX, TTL_SECONDS + 60)
|
||
else:
|
||
new_ttl = min(_TTL_MAX, TTL_SECONDS + 90)
|
||
if new_ttl != TTL_SECONDS:
|
||
TTL_SECONDS = new_ttl
|
||
try:
|
||
print(json.dumps({"event":"theme_preview_ttl_adapt","hit_ratio":round(hit_ratio,3),"ttl":TTL_SECONDS})) # noqa: T201
|
||
except Exception:
|
||
pass
|
||
_LAST_ADAPT_AT = now
|
||
|
||
def _compute_bg_interval() -> int:
|
||
"""Derive adaptive sleep interval using recent metrics (P2 PERF)."""
|
||
try:
|
||
m = preview_metrics()
|
||
p95 = float(m.get('preview_p95_build_ms') or 0.0)
|
||
err_rate = float(m.get('preview_error_rate_pct') or 0.0)
|
||
base = _BG_REFRESH_INTERVAL_S
|
||
# Heuristic: high latency -> lengthen interval slightly (avoid stampede), high error rate -> shorten (refresh quicker)
|
||
interval = base
|
||
if p95 > 350: # slow builds
|
||
interval = int(base * 1.75)
|
||
elif p95 > 250:
|
||
interval = int(base * 1.4)
|
||
elif p95 < 120:
|
||
interval = int(base * 0.85)
|
||
# Error rate influence
|
||
if err_rate > 5.0:
|
||
interval = max(_BG_REFRESH_MIN, int(interval * 0.6))
|
||
elif err_rate < 1.0 and p95 < 180:
|
||
# Very healthy -> stretch slightly (less churn)
|
||
interval = min(_BG_REFRESH_MAX, int(interval * 1.15))
|
||
return max(_BG_REFRESH_MIN, min(_BG_REFRESH_MAX, interval))
|
||
except Exception:
|
||
return max(_BG_REFRESH_MIN, _BG_REFRESH_INTERVAL_S)
|
||
|
||
def _bg_refresh_loop(): # pragma: no cover (background behavior)
|
||
import time as _t
|
||
while True:
|
||
if not _BG_REFRESH_ENABLED:
|
||
return
|
||
try:
|
||
ranked = sorted(_PREVIEW_PER_THEME_REQUESTS.items(), key=lambda kv: kv[1], reverse=True)
|
||
top = [slug for slug,_cnt in ranked[:10]]
|
||
for slug in top:
|
||
try:
|
||
get_theme_preview(slug, limit=12, colors=None, commander=None, uncapped=True)
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
_t.sleep(_compute_bg_interval())
|
||
|
||
def _ensure_bg_refresh_thread(): # pragma: no cover
|
||
global _BG_REFRESH_THREAD_STARTED
|
||
if _BG_REFRESH_THREAD_STARTED or not _BG_REFRESH_ENABLED:
|
||
return
|
||
try:
|
||
th = threading.Thread(target=_bg_refresh_loop, name="theme_preview_bg_refresh", daemon=True)
|
||
th.start()
|
||
_BG_REFRESH_THREAD_STARTED = True
|
||
except Exception:
|
||
pass
|
||
|
||
_PREVIEW_CACHE: "OrderedDict[Tuple[str, int, str | None, str | None, str], Dict[str, Any]]" = OrderedDict()
|
||
_CARD_INDEX: Dict[str, List[Dict[str, Any]]] = {}
|
||
_CARD_INDEX_MTIME: float | None = None
|
||
_PREVIEW_REQUESTS = 0
|
||
_PREVIEW_CACHE_HITS = 0
|
||
_PREVIEW_ERROR_COUNT = 0 # rolling count of preview build failures (non-cache operational)
|
||
_PREVIEW_REQUEST_ERROR_COUNT = 0 # client side reported fetch errors
|
||
_PREVIEW_BUILD_MS_TOTAL = 0.0
|
||
_PREVIEW_BUILD_COUNT = 0
|
||
_PREVIEW_LAST_BUST_AT: float | None = None
|
||
# Per-theme stats and global distribution tracking
|
||
_PREVIEW_PER_THEME: Dict[str, Dict[str, Any]] = {}
|
||
_PREVIEW_PER_THEME_REQUESTS: Dict[str, int] = {}
|
||
_BUILD_DURATIONS = deque(maxlen=500) # rolling window for percentile calc
|
||
_ROLE_GLOBAL_COUNTS: Dict[str, int] = {"payoff": 0, "enabler": 0, "support": 0, "wildcard": 0}
|
||
_CURATED_GLOBAL = 0 # example + curated_synergy (non-synthetic curated content)
|
||
_SAMPLED_GLOBAL = 0
|
||
|
||
# Rarity normalization mapping (baseline – extend as new variants appear)
|
||
_RARITY_NORM = {
|
||
"mythic rare": "mythic",
|
||
"mythic": "mythic",
|
||
"m": "mythic",
|
||
"rare": "rare",
|
||
"r": "rare",
|
||
"uncommon": "uncommon",
|
||
"u": "uncommon",
|
||
"common": "common",
|
||
"c": "common",
|
||
}
|
||
|
||
def _normalize_rarity(raw: str) -> str:
|
||
r = (raw or "").strip().lower()
|
||
return _RARITY_NORM.get(r, r)
|
||
|
||
def _preview_cache_max() -> int:
|
||
try:
|
||
val_raw = (__import__('os').getenv('THEME_PREVIEW_CACHE_MAX') or '400')
|
||
val = int(val_raw)
|
||
if val <= 0:
|
||
raise ValueError("cache max must be >0")
|
||
return val
|
||
except Exception:
|
||
# Emit single-line warning (stdout) – diagnostics style (won't break)
|
||
try:
|
||
print(json.dumps({"event":"theme_preview_cache_config_warning","message":"Invalid THEME_PREVIEW_CACHE_MAX; using default 400"})) # noqa: T201
|
||
except Exception:
|
||
pass
|
||
return 400
|
||
|
||
def _enforce_cache_limit():
|
||
try:
|
||
limit = max(50, _preview_cache_max())
|
||
while len(_PREVIEW_CACHE) > limit:
|
||
_PREVIEW_CACHE.popitem(last=False) # FIFO eviction
|
||
except Exception:
|
||
pass
|
||
|
||
CARD_FILES_GLOB = [
|
||
Path("csv_files/blue_cards.csv"),
|
||
Path("csv_files/white_cards.csv"),
|
||
Path("csv_files/black_cards.csv"),
|
||
Path("csv_files/red_cards.csv"),
|
||
Path("csv_files/green_cards.csv"),
|
||
Path("csv_files/colorless_cards.csv"),
|
||
Path("csv_files/cards.csv"), # fallback large file last
|
||
]
|
||
|
||
THEME_TAGS_COL = "themeTags"
|
||
NAME_COL = "name"
|
||
COLOR_IDENTITY_COL = "colorIdentity"
|
||
MANA_COST_COL = "manaCost"
|
||
RARITY_COL = "rarity" # Some CSVs may not include; optional
|
||
|
||
|
||
def _maybe_build_card_index():
|
||
global _CARD_INDEX, _CARD_INDEX_MTIME
|
||
latest = 0.0
|
||
mtimes: List[float] = []
|
||
for p in CARD_FILES_GLOB:
|
||
if p.exists():
|
||
mt = p.stat().st_mtime
|
||
mtimes.append(mt)
|
||
if mt > latest:
|
||
latest = mt
|
||
if _CARD_INDEX and _CARD_INDEX_MTIME and latest <= _CARD_INDEX_MTIME:
|
||
return
|
||
# Rebuild index
|
||
_CARD_INDEX = {}
|
||
for p in CARD_FILES_GLOB:
|
||
if not p.exists():
|
||
continue
|
||
try:
|
||
with p.open("r", encoding="utf-8", newline="") as fh:
|
||
reader = csv.DictReader(fh)
|
||
if not reader.fieldnames or THEME_TAGS_COL not in reader.fieldnames:
|
||
continue
|
||
for row in reader:
|
||
name = row.get(NAME_COL) or row.get("faceName") or ""
|
||
tags_raw = row.get(THEME_TAGS_COL) or ""
|
||
# tags stored like "['Blink', 'Enter the Battlefield']"; naive parse
|
||
tags = [t.strip(" '[]") for t in tags_raw.split(',') if t.strip()] if tags_raw else []
|
||
if not tags:
|
||
continue
|
||
color_id = (row.get(COLOR_IDENTITY_COL) or "").strip()
|
||
mana_cost = (row.get(MANA_COST_COL) or "").strip()
|
||
rarity = _normalize_rarity(row.get(RARITY_COL) or "")
|
||
for tg in tags:
|
||
if not tg:
|
||
continue
|
||
_CARD_INDEX.setdefault(tg, []).append({
|
||
"name": name,
|
||
"color_identity": color_id,
|
||
"tags": tags,
|
||
"mana_cost": mana_cost,
|
||
"rarity": rarity,
|
||
# Pre-parsed helpers (color identity list & pip colors from mana cost)
|
||
"color_identity_list": list(color_id) if color_id else [],
|
||
"pip_colors": [c for c in mana_cost if c in {"W","U","B","R","G"}],
|
||
})
|
||
except Exception:
|
||
continue
|
||
_CARD_INDEX_MTIME = latest
|
||
|
||
|
||
def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
|
||
tag_set = set(tags)
|
||
synergy_overlap = tag_set.intersection(synergies)
|
||
if theme in tag_set:
|
||
return "payoff"
|
||
if len(synergy_overlap) >= 2:
|
||
return "enabler"
|
||
if len(synergy_overlap) == 1:
|
||
return "support"
|
||
return "wildcard"
|
||
|
||
|
||
def _seed_from(theme: str, commander: Optional[str]) -> int:
|
||
base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
|
||
# simple deterministic hash (stable across runs within Python version – keep primitive)
|
||
h = 0
|
||
for b in base:
|
||
h = (h * 131 + b) & 0xFFFFFFFF
|
||
return h or 1
|
||
|
||
|
||
def _deterministic_shuffle(items: List[Any], seed: int) -> None:
|
||
rnd = random.Random(seed)
|
||
rnd.shuffle(items)
|
||
|
||
|
||
def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
|
||
tag_set = set(tags)
|
||
synergy_overlap = len(tag_set.intersection(synergies))
|
||
score = 0.0
|
||
if theme in tag_set:
|
||
score += 3.0
|
||
score += synergy_overlap * 1.2
|
||
# Role weight baseline
|
||
role_weights = {
|
||
"payoff": 2.5,
|
||
"enabler": 2.0,
|
||
"support": 1.5,
|
||
"wildcard": 0.9,
|
||
}
|
||
score += role_weights.get(role, 0.5)
|
||
# Base rarity weighting (future: dynamic diminishing duplicate penalty)
|
||
# Access rarity via closure later by augmenting item after score (handled outside)
|
||
return score
|
||
|
||
def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
|
||
"""Refined overlap scaling: only synergy tag intersections count toward diminishing curve.
|
||
|
||
Uses geometric diminishing returns: bonus = B * (1 - 0.5 ** n) where n is synergy overlap count.
|
||
Guarantees first overlap grants 50% of base, second 75%, third 87.5%, asymptotically approaching B.
|
||
"""
|
||
if not commander_tags or not synergy_set:
|
||
return 0.0
|
||
overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
|
||
if overlap_synergy <= 0:
|
||
return 0.0
|
||
return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))
|
||
|
||
|
||
def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]:
|
||
if not commander:
|
||
return None
|
||
_maybe_build_card_index()
|
||
# Commander can appear under many tags; brute scan limited to first match
|
||
needle = commander.lower().strip()
|
||
for tag_cards in _CARD_INDEX.values():
|
||
for c in tag_cards:
|
||
if c.get("name", "").lower() == needle:
|
||
return c
|
||
return None
|
||
|
||
|
||
def _sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[Dict[str, Any]]:
|
||
_maybe_build_card_index()
|
||
pool = _CARD_INDEX.get(theme) or []
|
||
if not pool:
|
||
return []
|
||
commander_card = _lookup_commander(commander)
|
||
commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
|
||
commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
|
||
if colors_filter:
|
||
allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
|
||
if allowed:
|
||
pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
|
||
# Apply commander color identity restriction if configured
|
||
if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
|
||
# Allow single off-color splash for 4-5 color commanders (leniency policy) with later mild penalty
|
||
allow_splash = len(commander_colors) >= 4
|
||
new_pool = []
|
||
for c in pool:
|
||
ci = set(c.get("color_identity", ""))
|
||
if not ci or ci.issubset(commander_colors):
|
||
new_pool.append(c)
|
||
continue
|
||
if allow_splash:
|
||
off = ci - commander_colors
|
||
if len(off) == 1: # single off-color splash
|
||
# mark for later penalty (avoid mutating shared index structure deeply; tag ephemeral flag)
|
||
c["_splash_off_color"] = True # type: ignore
|
||
new_pool.append(c)
|
||
continue
|
||
pool = new_pool
|
||
# Build role buckets
|
||
seen_names: set[str] = set()
|
||
payoff: List[Dict[str, Any]] = []
|
||
enabler: List[Dict[str, Any]] = []
|
||
support: List[Dict[str, Any]] = []
|
||
wildcard: List[Dict[str, Any]] = []
|
||
rarity_counts: Dict[str, int] = {}
|
||
synergy_set = set(synergies)
|
||
# Rarity calibration (P2 SAMPLING): allow tuning via env; default adjusted after observation.
|
||
rarity_weight_base = {
|
||
"mythic": float(os.getenv("RARITY_W_MYTHIC", "1.2")),
|
||
"rare": float(os.getenv("RARITY_W_RARE", "0.9")),
|
||
"uncommon": float(os.getenv("RARITY_W_UNCOMMON", "0.65")),
|
||
"common": float(os.getenv("RARITY_W_COMMON", "0.4")),
|
||
}
|
||
for raw in pool:
|
||
nm = raw.get("name")
|
||
if not nm or nm in seen_names:
|
||
continue
|
||
seen_names.add(nm)
|
||
tags = raw.get("tags", [])
|
||
role = _classify_role(theme, synergies, tags)
|
||
score = _score_card(theme, synergies, role, tags)
|
||
reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
|
||
if commander_card:
|
||
if theme in tags:
|
||
score += COMMANDER_THEME_MATCH_BONUS
|
||
reasons.append("commander_theme_match")
|
||
scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
|
||
if scaled:
|
||
score += scaled
|
||
reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
|
||
reasons.append("commander_bias")
|
||
rarity = raw.get("rarity") or ""
|
||
if rarity:
|
||
base_rarity_weight = rarity_weight_base.get(rarity, 0.25)
|
||
count_so_far = rarity_counts.get(rarity, 0)
|
||
# Diminishing influence: divide by (1 + 0.4 * duplicates_already)
|
||
score += base_rarity_weight / (1 + 0.4 * count_so_far)
|
||
rarity_counts[rarity] = count_so_far + 1
|
||
reasons.append(f"rarity_weight_calibrated:{rarity}:{round(base_rarity_weight/(1+0.4*count_so_far),2)}")
|
||
# Splash leniency penalty (applied after other scoring)
|
||
if raw.get("_splash_off_color"):
|
||
score -= 0.3
|
||
reasons.append("splash_off_color_penalty:-0.3")
|
||
item = {
|
||
"name": nm,
|
||
"colors": list(raw.get("color_identity", "")),
|
||
"roles": [role],
|
||
"tags": tags,
|
||
"score": score,
|
||
"reasons": reasons,
|
||
"mana_cost": raw.get("mana_cost"),
|
||
"rarity": rarity,
|
||
# Newly exposed server authoritative parsed helpers
|
||
"color_identity_list": raw.get("color_identity_list", []),
|
||
"pip_colors": raw.get("pip_colors", []),
|
||
}
|
||
if role == "payoff":
|
||
payoff.append(item)
|
||
elif role == "enabler":
|
||
enabler.append(item)
|
||
elif role == "support":
|
||
support.append(item)
|
||
else:
|
||
wildcard.append(item)
|
||
# Deterministic shuffle inside each bucket to avoid bias from CSV ordering
|
||
seed = _seed_from(theme, commander)
|
||
for bucket in (payoff, enabler, support, wildcard):
|
||
_deterministic_shuffle(bucket, seed)
|
||
# stable secondary ordering: higher score first, then name
|
||
bucket.sort(key=lambda x: (-x["score"], x["name"]))
|
||
|
||
# Diversity targets (after curated examples are pinned externally)
|
||
target_payoff = max(1, int(round(limit * 0.4)))
|
||
target_enabler_support = max(1, int(round(limit * 0.4)))
|
||
# support grouped with enabler for quota distribution
|
||
target_wild = max(0, limit - target_payoff - target_enabler_support)
|
||
|
||
def take(n: int, source: List[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
|
||
for i in range(min(n, len(source))):
|
||
yield source[i]
|
||
|
||
chosen: List[Dict[str, Any]] = []
|
||
# Collect payoff
|
||
chosen.extend(take(target_payoff, payoff))
|
||
# Collect enabler + support mix
|
||
remaining_for_enab = target_enabler_support
|
||
es_combined = enabler + support
|
||
chosen.extend(take(remaining_for_enab, es_combined))
|
||
# Collect wildcards
|
||
chosen.extend(take(target_wild, wildcard))
|
||
|
||
# If still short fill from remaining (payoff first, then enab, support, wildcard)
|
||
if len(chosen) < limit:
|
||
def fill_from(src: List[Dict[str, Any]]):
|
||
nonlocal chosen
|
||
for it in src:
|
||
if len(chosen) >= limit:
|
||
break
|
||
if it not in chosen:
|
||
chosen.append(it)
|
||
for bucket in (payoff, enabler, support, wildcard):
|
||
fill_from(bucket)
|
||
|
||
# Role saturation penalty (post-selection adjustment): discourage dominance overflow beyond soft thresholds
|
||
role_soft_caps = {
|
||
"payoff": int(round(limit * 0.5)),
|
||
"enabler": int(round(limit * 0.35)),
|
||
"support": int(round(limit * 0.35)),
|
||
"wildcard": int(round(limit * 0.25)),
|
||
}
|
||
role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
|
||
for it in chosen:
|
||
r = (it.get("roles") or [None])[0]
|
||
if not r or r not in role_soft_caps:
|
||
continue
|
||
role_seen[r] += 1
|
||
if role_seen[r] > max(1, role_soft_caps[r]):
|
||
it["score"] = it.get("score", 0) - 0.4
|
||
(it.setdefault("reasons", [])).append("role_saturation_penalty:-0.4")
|
||
# Truncate and re-rank final sequence deterministically by score then name (already ordered by selection except fill)
|
||
if len(chosen) > limit:
|
||
chosen = chosen[:limit]
|
||
# Normalize score scale (optional future; keep raw for now)
|
||
return chosen
|
||
# key: (slug, limit, colors, commander, etag)
|
||
|
||
|
||
def _now() -> float: # small indirection for future test monkeypatch
|
||
return time.time()
|
||
|
||
|
||
def _build_stub_items(detail: Dict[str, Any], limit: int, colors_filter: Optional[str], *, commander: Optional[str]) -> List[Dict[str, Any]]:
|
||
items: List[Dict[str, Any]] = []
|
||
# Start with curated example cards if present, else generic example_cards
|
||
curated_cards = detail.get("example_cards") or []
|
||
for idx, name in enumerate(curated_cards):
|
||
if len(items) >= limit:
|
||
break
|
||
items.append({
|
||
"name": name,
|
||
"colors": [], # unknown without deeper card DB link
|
||
"roles": ["example"],
|
||
"tags": [],
|
||
"score": float(limit - idx), # simple descending score
|
||
"reasons": ["curated_example"],
|
||
})
|
||
# Curated synergy example cards (if any) follow standard examples but before sampled
|
||
synergy_curated = detail.get("synergy_example_cards") or []
|
||
for name in synergy_curated:
|
||
if len(items) >= limit:
|
||
break
|
||
# Skip duplicates with example_cards
|
||
if any(it["name"] == name for it in items):
|
||
continue
|
||
items.append({
|
||
"name": name,
|
||
"colors": [],
|
||
"roles": ["curated_synergy"],
|
||
"tags": [],
|
||
"score": max((it["score"] for it in items), default=1.0) - 0.1, # just below top examples
|
||
"reasons": ["curated_synergy_example"],
|
||
})
|
||
# Remaining slots after curated examples
|
||
remaining = max(0, limit - len(items))
|
||
if remaining:
|
||
theme_name = detail.get("theme")
|
||
if isinstance(theme_name, str):
|
||
all_synergies = []
|
||
# Use uncapped synergies if available else merged list
|
||
if detail.get("uncapped_synergies"):
|
||
all_synergies = detail.get("uncapped_synergies") or []
|
||
else:
|
||
# Combine curated/enforced/inferred
|
||
seen = set()
|
||
for blk in (detail.get("curated_synergies") or [], detail.get("enforced_synergies") or [], detail.get("inferred_synergies") or []):
|
||
for s in blk:
|
||
if s not in seen:
|
||
all_synergies.append(s)
|
||
seen.add(s)
|
||
real_cards = _sample_real_cards_for_theme(theme_name, remaining, colors_filter, synergies=all_synergies, commander=commander)
|
||
for rc in real_cards:
|
||
if len(items) >= limit:
|
||
break
|
||
items.append(rc)
|
||
if len(items) < limit:
|
||
# Pad using synergies as synthetic placeholders to reach requested size
|
||
synergies = detail.get("uncapped_synergies") or detail.get("synergies") or []
|
||
for s in synergies:
|
||
if len(items) >= limit:
|
||
break
|
||
synthetic_name = f"[{s}]"
|
||
items.append({
|
||
"name": synthetic_name,
|
||
"colors": [],
|
||
"roles": ["synthetic"],
|
||
"tags": [s],
|
||
"score": 0.5, # lower score to keep curated first
|
||
"reasons": ["synthetic_synergy_placeholder"],
|
||
})
|
||
return items
|
||
|
||
|
||
def get_theme_preview(theme_id: str, *, limit: int = 12, colors: Optional[str] = None, commander: Optional[str] = None, uncapped: bool = True) -> Dict[str, Any]:
|
||
global _PREVIEW_REQUESTS, _PREVIEW_CACHE_HITS, _PREVIEW_BUILD_MS_TOTAL, _PREVIEW_BUILD_COUNT
|
||
idx = load_index()
|
||
slug = slugify(theme_id)
|
||
entry = idx.slug_to_entry.get(slug)
|
||
if not entry:
|
||
raise KeyError("theme_not_found")
|
||
# Use uncapped synergies for better placeholder coverage (diagnostics flag gating not applied here; placeholder only)
|
||
detail = project_detail(slug, entry, idx.slug_to_yaml, uncapped=uncapped)
|
||
colors_key = colors or None
|
||
commander_key = commander or None
|
||
cache_key = (slug, limit, colors_key, commander_key, idx.etag)
|
||
_PREVIEW_REQUESTS += 1
|
||
cached = _PREVIEW_CACHE.get(cache_key)
|
||
if cached and (_now() - cached["_cached_at"]) < TTL_SECONDS:
|
||
_PREVIEW_CACHE_HITS += 1
|
||
_RECENT_HITS.append(True)
|
||
# Count request (even if cache hit) for per-theme metrics
|
||
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
|
||
# Structured cache hit log (diagnostics gated)
|
||
try:
|
||
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
|
||
print(json.dumps({
|
||
"event": "theme_preview_cache_hit",
|
||
"theme": slug,
|
||
"limit": limit,
|
||
"colors": colors_key,
|
||
"commander": commander_key,
|
||
"ttl_remaining_s": round(TTL_SECONDS - (_now() - cached["_cached_at"]), 2)
|
||
}, separators=(",",":"))) # noqa: T201
|
||
except Exception:
|
||
pass
|
||
# Annotate cache hit flag (shallow copy to avoid mutating stored payload timings)
|
||
payload_cached = dict(cached["payload"])
|
||
payload_cached["cache_hit"] = True
|
||
return payload_cached
|
||
_RECENT_HITS.append(False)
|
||
# Build items
|
||
t0 = _now()
|
||
try:
|
||
items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
|
||
except Exception as e:
|
||
# Record error histogram & propagate
|
||
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
|
||
_PREVIEW_ERROR_COUNT += 1 # type: ignore
|
||
raise e
|
||
|
||
# Race condition guard (P2 RESILIENCE): If we somehow produced an empty sample (e.g., catalog rebuild mid-flight)
|
||
# retry a limited number of times with small backoff.
|
||
if not items:
|
||
for _retry in range(2): # up to 2 retries
|
||
time.sleep(0.05)
|
||
try:
|
||
items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
|
||
except Exception:
|
||
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
|
||
_PREVIEW_ERROR_COUNT += 1 # type: ignore
|
||
break
|
||
if items:
|
||
try:
|
||
print(json.dumps({"event":"theme_preview_retry_after_empty","theme":slug})) # noqa: T201
|
||
except Exception:
|
||
pass
|
||
break
|
||
build_ms = (_now() - t0) * 1000.0
|
||
_PREVIEW_BUILD_MS_TOTAL += build_ms
|
||
_PREVIEW_BUILD_COUNT += 1
|
||
# Duplicate suppression safety across roles (should already be unique, defensive)
|
||
seen_names: set[str] = set()
|
||
dedup: List[Dict[str, Any]] = []
|
||
for it in items:
|
||
nm = it.get("name")
|
||
if not nm:
|
||
continue
|
||
if nm in seen_names:
|
||
continue
|
||
seen_names.add(nm)
|
||
dedup.append(it)
|
||
items = dedup
|
||
|
||
# Aggregate statistics
|
||
curated_count = sum(1 for i in items if any(r in {"example", "curated_synergy"} for r in (i.get("roles") or [])))
|
||
sampled_core_roles = {"payoff", "enabler", "support", "wildcard"}
|
||
role_counts_local: Dict[str, int] = {r: 0 for r in sampled_core_roles}
|
||
for i in items:
|
||
roles = i.get("roles") or []
|
||
for r in roles:
|
||
if r in role_counts_local:
|
||
role_counts_local[r] += 1
|
||
# Update global counters
|
||
global _ROLE_GLOBAL_COUNTS, _CURATED_GLOBAL, _SAMPLED_GLOBAL
|
||
for r, c in role_counts_local.items():
|
||
_ROLE_GLOBAL_COUNTS[r] = _ROLE_GLOBAL_COUNTS.get(r, 0) + c
|
||
_CURATED_GLOBAL += curated_count
|
||
_SAMPLED_GLOBAL += sum(role_counts_local.values())
|
||
_BUILD_DURATIONS.append(build_ms)
|
||
per = _PREVIEW_PER_THEME.setdefault(slug, {"builds": 0, "total_ms": 0.0, "durations": deque(maxlen=50), "role_counts": {r: 0 for r in sampled_core_roles}, "curated": 0, "sampled": 0})
|
||
per["builds"] += 1
|
||
per["total_ms"] += build_ms
|
||
per["durations"].append(build_ms)
|
||
per["curated"] += curated_count
|
||
per["sampled"] += sum(role_counts_local.values())
|
||
for r, c in role_counts_local.items():
|
||
per["role_counts"][r] = per["role_counts"].get(r, 0) + c
|
||
|
||
synergies_used = detail.get("uncapped_synergies") or detail.get("synergies") or []
|
||
payload = {
|
||
"theme_id": slug,
|
||
"theme": detail.get("theme"),
|
||
"count_total": len(items), # population size TBD when full sampling added
|
||
"sample": items,
|
||
"synergies_used": synergies_used,
|
||
"generated_at": idx.catalog.metadata_info.generated_at if idx.catalog.metadata_info else None,
|
||
"colors_filter": colors_key,
|
||
"commander": commander_key,
|
||
"stub": False if any(it.get("roles") and it["roles"][0] in {"payoff", "support", "enabler", "wildcard"} for it in items) else True,
|
||
"role_counts": role_counts_local,
|
||
"curated_pct": round((curated_count / max(1, len(items))) * 100, 2),
|
||
"build_ms": round(build_ms, 2),
|
||
"curated_total": curated_count,
|
||
"sampled_total": sum(role_counts_local.values()),
|
||
"cache_hit": False,
|
||
}
|
||
_PREVIEW_CACHE[cache_key] = {"payload": payload, "_cached_at": _now()}
|
||
_PREVIEW_CACHE.move_to_end(cache_key)
|
||
_enforce_cache_limit()
|
||
# Track request count post-build
|
||
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
|
||
# Structured logging (opt-in)
|
||
try:
|
||
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
|
||
log_obj = {
|
||
"event": "theme_preview_build",
|
||
"theme": slug,
|
||
"limit": limit,
|
||
"colors": colors_key,
|
||
"commander": commander_key,
|
||
"build_ms": round(build_ms, 2),
|
||
"curated_pct": payload["curated_pct"],
|
||
"curated_total": payload["curated_total"],
|
||
"sampled_total": payload["sampled_total"],
|
||
"role_counts": role_counts_local,
|
||
"cache_hit": False,
|
||
}
|
||
print(json.dumps(log_obj, separators=(",",":"))) # noqa: T201
|
||
except Exception:
|
||
pass
|
||
# Post-build adaptive TTL evaluation & background refresher initialization
|
||
_maybe_adapt_ttl(_now())
|
||
_ensure_bg_refresh_thread()
|
||
return payload
|
||
|
||
|
||
def _percentile(sorted_vals: List[float], pct: float) -> float:
|
||
if not sorted_vals:
|
||
return 0.0
|
||
k = (len(sorted_vals) - 1) * pct
|
||
f = int(k)
|
||
c = min(f + 1, len(sorted_vals) - 1)
|
||
if f == c:
|
||
return sorted_vals[f]
|
||
d0 = sorted_vals[f] * (c - k)
|
||
d1 = sorted_vals[c] * (k - f)
|
||
return d0 + d1
|
||
|
||
def preview_metrics() -> Dict[str, Any]:
|
||
avg_ms = (_PREVIEW_BUILD_MS_TOTAL / _PREVIEW_BUILD_COUNT) if _PREVIEW_BUILD_COUNT else 0.0
|
||
durations_list = sorted(list(_BUILD_DURATIONS))
|
||
p95 = _percentile(durations_list, 0.95)
|
||
# Role distribution actual vs target (aggregate)
|
||
total_roles = sum(_ROLE_GLOBAL_COUNTS.values()) or 1
|
||
target = {"payoff": 0.4, "enabler+support": 0.4, "wildcard": 0.2}
|
||
actual_enabler_support = (_ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0)) / total_roles
|
||
role_distribution = {
|
||
"payoff": {
|
||
"count": _ROLE_GLOBAL_COUNTS.get("payoff", 0),
|
||
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("payoff", 0) / total_roles) * 100, 2),
|
||
"target_pct": target["payoff"] * 100,
|
||
},
|
||
"enabler_support": {
|
||
"count": _ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0),
|
||
"actual_pct": round(actual_enabler_support * 100, 2),
|
||
"target_pct": target["enabler+support"] * 100,
|
||
},
|
||
"wildcard": {
|
||
"count": _ROLE_GLOBAL_COUNTS.get("wildcard", 0),
|
||
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("wildcard", 0) / total_roles) * 100, 2),
|
||
"target_pct": target["wildcard"] * 100,
|
||
},
|
||
}
|
||
editorial_coverage_pct = round((_CURATED_GLOBAL / max(1, (_CURATED_GLOBAL + _SAMPLED_GLOBAL))) * 100, 2)
|
||
per_theme_stats = {}
|
||
for slug, data in list(_PREVIEW_PER_THEME.items())[:50]:
|
||
durs = list(data.get("durations", []))
|
||
sd = sorted(durs)
|
||
p50 = _percentile(sd, 0.50)
|
||
p95_local = _percentile(sd, 0.95)
|
||
per_theme_stats[slug] = {
|
||
"avg_ms": round(data["total_ms"] / max(1, data["builds"]), 2),
|
||
"p50_ms": round(p50, 2),
|
||
"p95_ms": round(p95_local, 2),
|
||
"builds": data["builds"],
|
||
"avg_curated_pct": round((data["curated"] / max(1, (data["curated"] + data["sampled"])) ) * 100, 2),
|
||
"requests": _PREVIEW_PER_THEME_REQUESTS.get(slug, 0),
|
||
"curated_total": data.get("curated", 0),
|
||
"sampled_total": data.get("sampled", 0),
|
||
}
|
||
error_rate = 0.0
|
||
total_req = _PREVIEW_REQUESTS or 0
|
||
if total_req:
|
||
error_rate = round((_PREVIEW_ERROR_COUNT / total_req) * 100, 2)
|
||
# Example coverage enforcement flag: when curated coverage exceeds threshold (default 90%)
|
||
try:
|
||
enforce_threshold = float(os.getenv("EXAMPLE_ENFORCE_THRESHOLD", "90"))
|
||
except Exception:
|
||
enforce_threshold = 90.0
|
||
example_enforcement_active = editorial_coverage_pct >= enforce_threshold
|
||
return {
|
||
"preview_requests": _PREVIEW_REQUESTS,
|
||
"preview_cache_hits": _PREVIEW_CACHE_HITS,
|
||
"preview_cache_entries": len(_PREVIEW_CACHE),
|
||
"preview_avg_build_ms": round(avg_ms, 2),
|
||
"preview_p95_build_ms": round(p95, 2),
|
||
"preview_error_rate_pct": error_rate,
|
||
"preview_client_fetch_errors": _PREVIEW_REQUEST_ERROR_COUNT,
|
||
"preview_ttl_seconds": TTL_SECONDS,
|
||
"preview_ttl_adaptive": _ADAPTATION_ENABLED,
|
||
"preview_ttl_window": len(_RECENT_HITS),
|
||
"preview_last_bust_at": _PREVIEW_LAST_BUST_AT,
|
||
"role_distribution": role_distribution,
|
||
"editorial_curated_vs_sampled_pct": editorial_coverage_pct,
|
||
"example_enforcement_active": example_enforcement_active,
|
||
"example_enforce_threshold_pct": enforce_threshold,
|
||
"editorial_curated_total": _CURATED_GLOBAL,
|
||
"editorial_sampled_total": _SAMPLED_GLOBAL,
|
||
"per_theme": per_theme_stats,
|
||
"per_theme_errors": dict(list(_PREVIEW_PER_THEME_ERRORS.items())[:50]),
|
||
"curated_synergy_matrix_loaded": _CURATED_SYNERGY_MATRIX is not None,
|
||
"curated_synergy_matrix_size": sum(len(v) for v in _CURATED_SYNERGY_MATRIX.values()) if _CURATED_SYNERGY_MATRIX else 0,
|
||
}
|
||
|
||
|
||
def bust_preview_cache(reason: str | None = None) -> None:
|
||
"""Clear in-memory preview cache (e.g., after catalog rebuild or tagging).
|
||
|
||
Exposed for orchestrator hooks. Keeps metrics counters (requests/hits) for
|
||
observability; records last bust timestamp.
|
||
"""
|
||
global _PREVIEW_CACHE, _PREVIEW_LAST_BUST_AT
|
||
try: # defensive; never raise
|
||
_PREVIEW_CACHE.clear()
|
||
import time as _t
|
||
_PREVIEW_LAST_BUST_AT = _t.time()
|
||
except Exception:
|
||
pass
|