mtg_python_deckbuilder/code/web/services/theme_preview.py

"""Theme preview sampling (Phase F – enhanced sampling & diversity heuristics).

Summary of implemented capabilities and pending roadmap items documented inline.
"""
from __future__ import annotations

from pathlib import Path
import csv
import time
import random
from collections import OrderedDict, deque
from typing import List, Dict, Any, Optional, Tuple, Iterable
import os
import json
import threading

try:
    import yaml  # type: ignore
except Exception:  # pragma: no cover - PyYAML already in requirements; defensive
    yaml = None  # type: ignore

from .theme_catalog_loader import load_index, slugify, project_detail

# NOTE: Remainder of module keeps large logic blocks; imports consolidated above per PEP8.

# Commander bias configuration constants
COMMANDER_COLOR_FILTER_STRICT = True  # If commander found, restrict sample to its color identity (except colorless)
COMMANDER_OVERLAP_BONUS = 1.8  # additive score bonus for sharing at least one tag with commander
COMMANDER_THEME_MATCH_BONUS = 0.9  # extra if also matches theme directly

## (duplicate imports removed)

# Adaptive TTL configuration (can be toggled via THEME_PREVIEW_ADAPTIVE=1)
# Starts at a baseline and is adjusted up/down based on cache hit ratio bands.
TTL_SECONDS = 600  # current effective TTL (mutable)
_TTL_BASE = 600
_TTL_MIN = 300
_TTL_MAX = 900
_ADAPT_SAMPLE_WINDOW = 120  # number of recent requests to evaluate
_ADAPTATION_ENABLED = (os.getenv("THEME_PREVIEW_ADAPTIVE") or "").lower() in {"1","true","yes","on"}
_RECENT_HITS: deque[bool] = deque(maxlen=_ADAPT_SAMPLE_WINDOW)
_LAST_ADAPT_AT: float | None = None
_ADAPT_INTERVAL_S = 30  # do not adapt more often than every 30s

_BG_REFRESH_THREAD_STARTED = False
_BG_REFRESH_INTERVAL_S = int(os.getenv("THEME_PREVIEW_BG_REFRESH_INTERVAL") or 120)
_BG_REFRESH_ENABLED = (os.getenv("THEME_PREVIEW_BG_REFRESH") or "").lower() in {"1","true","yes","on"}

# Adaptive background refresh heuristics (P2): we will adjust per-loop sleep based on
# recent error rate & p95 build latency. Bounds: [30s, 5 * base interval].
_BG_REFRESH_MIN = 30
_BG_REFRESH_MAX = max(300, _BG_REFRESH_INTERVAL_S * 5)

# Per-theme error histogram (P2 observability)
_PREVIEW_PER_THEME_ERRORS: Dict[str, int] = {}

# Optional curated synergy pair matrix externalization (P2 DATA).
_CURATED_SYNERGY_MATRIX_PATH = Path("config/themes/curated_synergy_matrix.yml")
_CURATED_SYNERGY_MATRIX: Dict[str, Dict[str, Any]] | None = None

def _load_curated_synergy_matrix() -> None:
    global _CURATED_SYNERGY_MATRIX
    if _CURATED_SYNERGY_MATRIX is not None:
        return
    if not _CURATED_SYNERGY_MATRIX_PATH.exists() or yaml is None:
        _CURATED_SYNERGY_MATRIX = None
        return
    try:
        with _CURATED_SYNERGY_MATRIX_PATH.open('r', encoding='utf-8') as fh:
            data = yaml.safe_load(fh) or {}
        if isinstance(data, dict):
            # Expect top-level key 'pairs' but allow raw mapping
            pairs = data.get('pairs', data)
            if isinstance(pairs, dict):
                _CURATED_SYNERGY_MATRIX = pairs  # type: ignore
            else:
                _CURATED_SYNERGY_MATRIX = None
        else:
            _CURATED_SYNERGY_MATRIX = None
    except Exception:
        _CURATED_SYNERGY_MATRIX = None

_load_curated_synergy_matrix()

def _maybe_adapt_ttl(now: float) -> None:
    """Adjust global TTL_SECONDS based on recent hit ratio bands.

    Strategy:
      - If hit ratio < 0.25: decrease TTL slightly (favor freshness) ( -60s )
      - If hit ratio between 0.25–0.55: gently nudge toward base ( +/- 30s toward _TTL_BASE )
      - If hit ratio between 0.55–0.75: slight increase (+60s) (stability payoff)
      - If hit ratio > 0.75: stronger increase (+90s) to leverage locality
    Never exceeds [_TTL_MIN, _TTL_MAX]. Only runs if enough samples.
    """
    global TTL_SECONDS, _LAST_ADAPT_AT
    if not _ADAPTATION_ENABLED:
        return
    if len(_RECENT_HITS) < max(30, int(_ADAPT_SAMPLE_WINDOW * 0.5)):
        return  # insufficient data
    if _LAST_ADAPT_AT and (now - _LAST_ADAPT_AT) < _ADAPT_INTERVAL_S:
        return
    hit_ratio = sum(1 for h in _RECENT_HITS if h) / len(_RECENT_HITS)
    new_ttl = TTL_SECONDS
    if hit_ratio < 0.25:
        new_ttl = max(_TTL_MIN, TTL_SECONDS - 60)
    elif hit_ratio < 0.55:
        # move 30s toward base
        if TTL_SECONDS > _TTL_BASE:
            new_ttl = max(_TTL_BASE, TTL_SECONDS - 30)
        elif TTL_SECONDS < _TTL_BASE:
            new_ttl = min(_TTL_BASE, TTL_SECONDS + 30)
    elif hit_ratio < 0.75:
        new_ttl = min(_TTL_MAX, TTL_SECONDS + 60)
    else:
        new_ttl = min(_TTL_MAX, TTL_SECONDS + 90)
    if new_ttl != TTL_SECONDS:
        TTL_SECONDS = new_ttl
        try:
            print(json.dumps({"event":"theme_preview_ttl_adapt","hit_ratio":round(hit_ratio,3),"ttl":TTL_SECONDS}))  # noqa: T201
        except Exception:
            pass
    _LAST_ADAPT_AT = now

def _compute_bg_interval() -> int:
    """Derive adaptive sleep interval using recent metrics (P2 PERF)."""
    try:
        m = preview_metrics()
        p95 = float(m.get('preview_p95_build_ms') or 0.0)
        err_rate = float(m.get('preview_error_rate_pct') or 0.0)
        base = _BG_REFRESH_INTERVAL_S
        # Heuristic: high latency -> lengthen interval slightly (avoid stampede), high error rate -> shorten (refresh quicker)
        interval = base
        if p95 > 350:  # slow builds
            interval = int(base * 1.75)
        elif p95 > 250:
            interval = int(base * 1.4)
        elif p95 < 120:
            interval = int(base * 0.85)
        # Error rate influence
        if err_rate > 5.0:
            interval = max(_BG_REFRESH_MIN, int(interval * 0.6))
        elif err_rate < 1.0 and p95 < 180:
            # Very healthy -> stretch slightly (less churn)
            interval = min(_BG_REFRESH_MAX, int(interval * 1.15))
        return max(_BG_REFRESH_MIN, min(_BG_REFRESH_MAX, interval))
    except Exception:
        return max(_BG_REFRESH_MIN, _BG_REFRESH_INTERVAL_S)

def _bg_refresh_loop():  # pragma: no cover (background behavior)
    import time as _t
    while True:
        if not _BG_REFRESH_ENABLED:
            return
        try:
            ranked = sorted(_PREVIEW_PER_THEME_REQUESTS.items(), key=lambda kv: kv[1], reverse=True)
            top = [slug for slug,_cnt in ranked[:10]]
            for slug in top:
                try:
                    get_theme_preview(slug, limit=12, colors=None, commander=None, uncapped=True)
                except Exception:
                    continue
        except Exception:
            pass
        _t.sleep(_compute_bg_interval())

def _ensure_bg_refresh_thread():  # pragma: no cover
    global _BG_REFRESH_THREAD_STARTED
    if _BG_REFRESH_THREAD_STARTED or not _BG_REFRESH_ENABLED:
        return
    try:
        th = threading.Thread(target=_bg_refresh_loop, name="theme_preview_bg_refresh", daemon=True)
        th.start()
        _BG_REFRESH_THREAD_STARTED = True
    except Exception:
        pass

_PREVIEW_CACHE: "OrderedDict[Tuple[str, int, str | None, str | None, str], Dict[str, Any]]" = OrderedDict()
_CARD_INDEX: Dict[str, List[Dict[str, Any]]] = {}
_CARD_INDEX_MTIME: float | None = None
_PREVIEW_REQUESTS = 0
_PREVIEW_CACHE_HITS = 0
_PREVIEW_ERROR_COUNT = 0  # rolling count of preview build failures (non-cache operational)
_PREVIEW_REQUEST_ERROR_COUNT = 0  # client side reported fetch errors
_PREVIEW_BUILD_MS_TOTAL = 0.0
_PREVIEW_BUILD_COUNT = 0
_PREVIEW_LAST_BUST_AT: float | None = None
# Per-theme stats and global distribution tracking
_PREVIEW_PER_THEME: Dict[str, Dict[str, Any]] = {}
_PREVIEW_PER_THEME_REQUESTS: Dict[str, int] = {}
_BUILD_DURATIONS = deque(maxlen=500)  # rolling window for percentile calc
_ROLE_GLOBAL_COUNTS: Dict[str, int] = {"payoff": 0, "enabler": 0, "support": 0, "wildcard": 0}
_CURATED_GLOBAL = 0  # example + curated_synergy (non-synthetic curated content)
_SAMPLED_GLOBAL = 0

# Rarity normalization mapping (baseline – extend as new variants appear)
_RARITY_NORM = {
    "mythic rare": "mythic",
    "mythic": "mythic",
    "m": "mythic",
    "rare": "rare",
    "r": "rare",
    "uncommon": "uncommon",
    "u": "uncommon",
    "common": "common",
    "c": "common",
}

def _normalize_rarity(raw: str) -> str:
    r = (raw or "").strip().lower()
    return _RARITY_NORM.get(r, r)

def _preview_cache_max() -> int:
    try:
        val_raw = (__import__('os').getenv('THEME_PREVIEW_CACHE_MAX') or '400')
        val = int(val_raw)
        if val <= 0:
            raise ValueError("cache max must be >0")
        return val
    except Exception:
        # Emit single-line warning (stdout) – diagnostics style (won't break)
        try:
            print(json.dumps({"event":"theme_preview_cache_config_warning","message":"Invalid THEME_PREVIEW_CACHE_MAX; using default 400"}))  # noqa: T201
        except Exception:
            pass
        return 400

def _enforce_cache_limit():
    try:
        limit = max(50, _preview_cache_max())
        while len(_PREVIEW_CACHE) > limit:
            _PREVIEW_CACHE.popitem(last=False)  # FIFO eviction
    except Exception:
        pass

CARD_FILES_GLOB = [
    Path("csv_files/blue_cards.csv"),
    Path("csv_files/white_cards.csv"),
    Path("csv_files/black_cards.csv"),
    Path("csv_files/red_cards.csv"),
    Path("csv_files/green_cards.csv"),
    Path("csv_files/colorless_cards.csv"),
    Path("csv_files/cards.csv"),  # fallback large file last
]

THEME_TAGS_COL = "themeTags"
NAME_COL = "name"
COLOR_IDENTITY_COL = "colorIdentity"
MANA_COST_COL = "manaCost"
RARITY_COL = "rarity"  # Some CSVs may not include; optional


def _maybe_build_card_index():
    global _CARD_INDEX, _CARD_INDEX_MTIME
    latest = 0.0
    mtimes: List[float] = []
    for p in CARD_FILES_GLOB:
        if p.exists():
            mt = p.stat().st_mtime
            mtimes.append(mt)
            if mt > latest:
                latest = mt
    if _CARD_INDEX and _CARD_INDEX_MTIME and latest <= _CARD_INDEX_MTIME:
        return
    # Rebuild index
    _CARD_INDEX = {}
    for p in CARD_FILES_GLOB:
        if not p.exists():
            continue
        try:
            with p.open("r", encoding="utf-8", newline="") as fh:
                reader = csv.DictReader(fh)
                if not reader.fieldnames or THEME_TAGS_COL not in reader.fieldnames:
                    continue
                for row in reader:
                    name = row.get(NAME_COL) or row.get("faceName") or ""
                    tags_raw = row.get(THEME_TAGS_COL) or ""
                    # tags stored like "['Blink', 'Enter the Battlefield']"; naive parse
                    tags = [t.strip(" '[]") for t in tags_raw.split(',') if t.strip()] if tags_raw else []
                    if not tags:
                        continue
                    color_id = (row.get(COLOR_IDENTITY_COL) or "").strip()
                    mana_cost = (row.get(MANA_COST_COL) or "").strip()
                    rarity = _normalize_rarity(row.get(RARITY_COL) or "")
                    for tg in tags:
                        if not tg:
                            continue
                        _CARD_INDEX.setdefault(tg, []).append({
                            "name": name,
                            "color_identity": color_id,
                            "tags": tags,
                            "mana_cost": mana_cost,
                            "rarity": rarity,
                            # Pre-parsed helpers (color identity list & pip colors from mana cost)
                            "color_identity_list": list(color_id) if color_id else [],
                            "pip_colors": [c for c in mana_cost if c in {"W","U","B","R","G"}],
                        })
        except Exception:
            continue
    _CARD_INDEX_MTIME = latest


def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
    tag_set = set(tags)
    synergy_overlap = tag_set.intersection(synergies)
    if theme in tag_set:
        return "payoff"
    if len(synergy_overlap) >= 2:
        return "enabler"
    if len(synergy_overlap) == 1:
        return "support"
    return "wildcard"


def _seed_from(theme: str, commander: Optional[str]) -> int:
    base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
    # simple deterministic hash (stable across runs within Python version – keep primitive)
    h = 0
    for b in base:
        h = (h * 131 + b) & 0xFFFFFFFF
    return h or 1


def _deterministic_shuffle(items: List[Any], seed: int) -> None:
    rnd = random.Random(seed)
    rnd.shuffle(items)


def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
    tag_set = set(tags)
    synergy_overlap = len(tag_set.intersection(synergies))
    score = 0.0
    if theme in tag_set:
        score += 3.0
    score += synergy_overlap * 1.2
    # Role weight baseline
    role_weights = {
        "payoff": 2.5,
        "enabler": 2.0,
        "support": 1.5,
        "wildcard": 0.9,
    }
    score += role_weights.get(role, 0.5)
    # Base rarity weighting (future: dynamic diminishing duplicate penalty)
    # Access rarity via closure later by augmenting item after score (handled outside)
    return score

def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
    """Refined overlap scaling: only synergy tag intersections count toward diminishing curve.

    Uses geometric diminishing returns: bonus = B * (1 - 0.5 ** n) where n is synergy overlap count.
    Guarantees first overlap grants 50% of base, second 75%, third 87.5%, asymptotically approaching B.
    """
    if not commander_tags or not synergy_set:
        return 0.0
    overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
    if overlap_synergy <= 0:
        return 0.0
    return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))


def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]:
    if not commander:
        return None
    _maybe_build_card_index()
    # Commander can appear under many tags; brute scan limited to first match
    needle = commander.lower().strip()
    for tag_cards in _CARD_INDEX.values():
        for c in tag_cards:
            if c.get("name", "").lower() == needle:
                return c
    return None


def _sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[Dict[str, Any]]:
    _maybe_build_card_index()
    pool = _CARD_INDEX.get(theme) or []
    if not pool:
        return []
    commander_card = _lookup_commander(commander)
    commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
    commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
    if colors_filter:
        allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
        if allowed:
            pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
    # Apply commander color identity restriction if configured
    if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
        # Allow single off-color splash for 4-5 color commanders (leniency policy) with later mild penalty
        allow_splash = len(commander_colors) >= 4
        new_pool = []
        for c in pool:
            ci = set(c.get("color_identity", ""))
            if not ci or ci.issubset(commander_colors):
                new_pool.append(c)
                continue
            if allow_splash:
                off = ci - commander_colors
                if len(off) == 1:  # single off-color splash
                    # mark for later penalty (avoid mutating shared index structure deeply; tag ephemeral flag)
                    c["_splash_off_color"] = True  # type: ignore
                    new_pool.append(c)
                    continue
        pool = new_pool
    # Build role buckets
    seen_names: set[str] = set()
    payoff: List[Dict[str, Any]] = []
    enabler: List[Dict[str, Any]] = []
    support: List[Dict[str, Any]] = []
    wildcard: List[Dict[str, Any]] = []
    rarity_counts: Dict[str, int] = {}
    synergy_set = set(synergies)
    # Rarity calibration (P2 SAMPLING): allow tuning via env; default adjusted after observation.
    rarity_weight_base = {
        "mythic": float(os.getenv("RARITY_W_MYTHIC", "1.2")),
        "rare": float(os.getenv("RARITY_W_RARE", "0.9")),
        "uncommon": float(os.getenv("RARITY_W_UNCOMMON", "0.65")),
        "common": float(os.getenv("RARITY_W_COMMON", "0.4")),
    }
    for raw in pool:
        nm = raw.get("name")
        if not nm or nm in seen_names:
            continue
        seen_names.add(nm)
        tags = raw.get("tags", [])
        role = _classify_role(theme, synergies, tags)
        score = _score_card(theme, synergies, role, tags)
        reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
        if commander_card:
            if theme in tags:
                score += COMMANDER_THEME_MATCH_BONUS
                reasons.append("commander_theme_match")
            scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
            if scaled:
                score += scaled
                reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
            reasons.append("commander_bias")
        rarity = raw.get("rarity") or ""
        if rarity:
            base_rarity_weight = rarity_weight_base.get(rarity, 0.25)
            count_so_far = rarity_counts.get(rarity, 0)
            # Diminishing influence: divide by (1 + 0.4 * duplicates_already)
            score += base_rarity_weight / (1 + 0.4 * count_so_far)
            rarity_counts[rarity] = count_so_far + 1
            reasons.append(f"rarity_weight_calibrated:{rarity}:{round(base_rarity_weight/(1+0.4*count_so_far),2)}")
        # Splash leniency penalty (applied after other scoring)
        if raw.get("_splash_off_color"):
            score -= 0.3
            reasons.append("splash_off_color_penalty:-0.3")
        item = {
            "name": nm,
            "colors": list(raw.get("color_identity", "")),
            "roles": [role],
            "tags": tags,
            "score": score,
            "reasons": reasons,
            "mana_cost": raw.get("mana_cost"),
            "rarity": rarity,
            # Newly exposed server authoritative parsed helpers
            "color_identity_list": raw.get("color_identity_list", []),
            "pip_colors": raw.get("pip_colors", []),
        }
        if role == "payoff":
            payoff.append(item)
        elif role == "enabler":
            enabler.append(item)
        elif role == "support":
            support.append(item)
        else:
            wildcard.append(item)
    # Deterministic shuffle inside each bucket to avoid bias from CSV ordering
    seed = _seed_from(theme, commander)
    for bucket in (payoff, enabler, support, wildcard):
        _deterministic_shuffle(bucket, seed)
        # stable secondary ordering: higher score first, then name
        bucket.sort(key=lambda x: (-x["score"], x["name"]))

    # Diversity targets (after curated examples are pinned externally)
    target_payoff = max(1, int(round(limit * 0.4)))
    target_enabler_support = max(1, int(round(limit * 0.4)))
    # support grouped with enabler for quota distribution
    target_wild = max(0, limit - target_payoff - target_enabler_support)

    def take(n: int, source: List[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
        for i in range(min(n, len(source))):
            yield source[i]

    chosen: List[Dict[str, Any]] = []
    # Collect payoff
    chosen.extend(take(target_payoff, payoff))
    # Collect enabler + support mix
    remaining_for_enab = target_enabler_support
    es_combined = enabler + support
    chosen.extend(take(remaining_for_enab, es_combined))
    # Collect wildcards
    chosen.extend(take(target_wild, wildcard))

    # If still short fill from remaining (payoff first, then enab, support, wildcard)
    if len(chosen) < limit:
        def fill_from(src: List[Dict[str, Any]]):
            nonlocal chosen
            for it in src:
                if len(chosen) >= limit:
                    break
                if it not in chosen:
                    chosen.append(it)
        for bucket in (payoff, enabler, support, wildcard):
            fill_from(bucket)

    # Role saturation penalty (post-selection adjustment): discourage dominance overflow beyond soft thresholds
    role_soft_caps = {
        "payoff": int(round(limit * 0.5)),
        "enabler": int(round(limit * 0.35)),
        "support": int(round(limit * 0.35)),
        "wildcard": int(round(limit * 0.25)),
    }
    role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
    for it in chosen:
        r = (it.get("roles") or [None])[0]
        if not r or r not in role_soft_caps:
            continue
        role_seen[r] += 1
        if role_seen[r] > max(1, role_soft_caps[r]):
            it["score"] = it.get("score", 0) - 0.4
            (it.setdefault("reasons", [])).append("role_saturation_penalty:-0.4")
    # Truncate and re-rank final sequence deterministically by score then name (already ordered by selection except fill)
    if len(chosen) > limit:
        chosen = chosen[:limit]
    # Normalize score scale (optional future; keep raw for now)
    return chosen
# key: (slug, limit, colors, commander, etag)


def _now() -> float:  # small indirection for future test monkeypatch
    return time.time()


def _build_stub_items(detail: Dict[str, Any], limit: int, colors_filter: Optional[str], *, commander: Optional[str]) -> List[Dict[str, Any]]:
    items: List[Dict[str, Any]] = []
    # Start with curated example cards if present, else generic example_cards
    curated_cards = detail.get("example_cards") or []
    for idx, name in enumerate(curated_cards):
        if len(items) >= limit:
            break
        items.append({
            "name": name,
            "colors": [],  # unknown without deeper card DB link
            "roles": ["example"],
            "tags": [],
            "score": float(limit - idx),  # simple descending score
            "reasons": ["curated_example"],
        })
    # Curated synergy example cards (if any) follow standard examples but before sampled
    synergy_curated = detail.get("synergy_example_cards") or []
    for name in synergy_curated:
        if len(items) >= limit:
            break
        # Skip duplicates with example_cards
        if any(it["name"] == name for it in items):
            continue
        items.append({
            "name": name,
            "colors": [],
            "roles": ["curated_synergy"],
            "tags": [],
            "score": max((it["score"] for it in items), default=1.0) - 0.1,  # just below top examples
            "reasons": ["curated_synergy_example"],
        })
    # Remaining slots after curated examples
    remaining = max(0, limit - len(items))
    if remaining:
        theme_name = detail.get("theme")
        if isinstance(theme_name, str):
            all_synergies = []
            # Use uncapped synergies if available else merged list
            if detail.get("uncapped_synergies"):
                all_synergies = detail.get("uncapped_synergies") or []
            else:
                # Combine curated/enforced/inferred
                seen = set()
                for blk in (detail.get("curated_synergies") or [], detail.get("enforced_synergies") or [], detail.get("inferred_synergies") or []):
                    for s in blk:
                        if s not in seen:
                            all_synergies.append(s)
                            seen.add(s)
            real_cards = _sample_real_cards_for_theme(theme_name, remaining, colors_filter, synergies=all_synergies, commander=commander)
            for rc in real_cards:
                if len(items) >= limit:
                    break
                items.append(rc)
    if len(items) < limit:
        # Pad using synergies as synthetic placeholders to reach requested size
        synergies = detail.get("uncapped_synergies") or detail.get("synergies") or []
        for s in synergies:
            if len(items) >= limit:
                break
            synthetic_name = f"[{s}]"
            items.append({
                "name": synthetic_name,
                "colors": [],
                "roles": ["synthetic"],
                "tags": [s],
                "score": 0.5,  # lower score to keep curated first
                "reasons": ["synthetic_synergy_placeholder"],
            })
    return items


def get_theme_preview(theme_id: str, *, limit: int = 12, colors: Optional[str] = None, commander: Optional[str] = None, uncapped: bool = True) -> Dict[str, Any]:
    global _PREVIEW_REQUESTS, _PREVIEW_CACHE_HITS, _PREVIEW_BUILD_MS_TOTAL, _PREVIEW_BUILD_COUNT
    idx = load_index()
    slug = slugify(theme_id)
    entry = idx.slug_to_entry.get(slug)
    if not entry:
        raise KeyError("theme_not_found")
    # Use uncapped synergies for better placeholder coverage (diagnostics flag gating not applied here; placeholder only)
    detail = project_detail(slug, entry, idx.slug_to_yaml, uncapped=uncapped)
    colors_key = colors or None
    commander_key = commander or None
    cache_key = (slug, limit, colors_key, commander_key, idx.etag)
    _PREVIEW_REQUESTS += 1
    cached = _PREVIEW_CACHE.get(cache_key)
    if cached and (_now() - cached["_cached_at"]) < TTL_SECONDS:
        _PREVIEW_CACHE_HITS += 1
        _RECENT_HITS.append(True)
        # Count request (even if cache hit) for per-theme metrics
        _PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
        # Structured cache hit log (diagnostics gated)
        try:
            if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
                print(json.dumps({
                    "event": "theme_preview_cache_hit",
                    "theme": slug,
                    "limit": limit,
                    "colors": colors_key,
                    "commander": commander_key,
                    "ttl_remaining_s": round(TTL_SECONDS - (_now() - cached["_cached_at"]), 2)
                }, separators=(",",":")))  # noqa: T201
        except Exception:
            pass
        # Annotate cache hit flag (shallow copy to avoid mutating stored payload timings)
        payload_cached = dict(cached["payload"])
        payload_cached["cache_hit"] = True
        return payload_cached
    _RECENT_HITS.append(False)
    # Build items
    t0 = _now()
    try:
        items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
    except Exception as e:
        # Record error histogram & propagate
        _PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
        _PREVIEW_ERROR_COUNT += 1  # type: ignore
        raise e

    # Race condition guard (P2 RESILIENCE): If we somehow produced an empty sample (e.g., catalog rebuild mid-flight)
    # retry a limited number of times with small backoff.
    if not items:
        for _retry in range(2):  # up to 2 retries
            time.sleep(0.05)
            try:
                items = _build_stub_items(detail, limit, colors_key, commander=commander_key)
            except Exception:
                _PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
                _PREVIEW_ERROR_COUNT += 1  # type: ignore
                break
            if items:
                try:
                    print(json.dumps({"event":"theme_preview_retry_after_empty","theme":slug}))  # noqa: T201
                except Exception:
                    pass
                break
    build_ms = (_now() - t0) * 1000.0
    _PREVIEW_BUILD_MS_TOTAL += build_ms
    _PREVIEW_BUILD_COUNT += 1
    # Duplicate suppression safety across roles (should already be unique, defensive)
    seen_names: set[str] = set()
    dedup: List[Dict[str, Any]] = []
    for it in items:
        nm = it.get("name")
        if not nm:
            continue
        if nm in seen_names:
            continue
        seen_names.add(nm)
        dedup.append(it)
    items = dedup

    # Aggregate statistics
    curated_count = sum(1 for i in items if any(r in {"example", "curated_synergy"} for r in (i.get("roles") or [])))
    sampled_core_roles = {"payoff", "enabler", "support", "wildcard"}
    role_counts_local: Dict[str, int] = {r: 0 for r in sampled_core_roles}
    for i in items:
        roles = i.get("roles") or []
        for r in roles:
            if r in role_counts_local:
                role_counts_local[r] += 1
    # Update global counters
    global _ROLE_GLOBAL_COUNTS, _CURATED_GLOBAL, _SAMPLED_GLOBAL
    for r, c in role_counts_local.items():
        _ROLE_GLOBAL_COUNTS[r] = _ROLE_GLOBAL_COUNTS.get(r, 0) + c
    _CURATED_GLOBAL += curated_count
    _SAMPLED_GLOBAL += sum(role_counts_local.values())
    _BUILD_DURATIONS.append(build_ms)
    per = _PREVIEW_PER_THEME.setdefault(slug, {"builds": 0, "total_ms": 0.0, "durations": deque(maxlen=50), "role_counts": {r: 0 for r in sampled_core_roles}, "curated": 0, "sampled": 0})
    per["builds"] += 1
    per["total_ms"] += build_ms
    per["durations"].append(build_ms)
    per["curated"] += curated_count
    per["sampled"] += sum(role_counts_local.values())
    for r, c in role_counts_local.items():
        per["role_counts"][r] = per["role_counts"].get(r, 0) + c

    synergies_used = detail.get("uncapped_synergies") or detail.get("synergies") or []
    payload = {
        "theme_id": slug,
        "theme": detail.get("theme"),
        "count_total": len(items),  # population size TBD when full sampling added
        "sample": items,
        "synergies_used": synergies_used,
        "generated_at": idx.catalog.metadata_info.generated_at if idx.catalog.metadata_info else None,
        "colors_filter": colors_key,
        "commander": commander_key,
        "stub": False if any(it.get("roles") and it["roles"][0] in {"payoff", "support", "enabler", "wildcard"} for it in items) else True,
        "role_counts": role_counts_local,
        "curated_pct": round((curated_count / max(1, len(items))) * 100, 2),
        "build_ms": round(build_ms, 2),
        "curated_total": curated_count,
        "sampled_total": sum(role_counts_local.values()),
        "cache_hit": False,
    }
    _PREVIEW_CACHE[cache_key] = {"payload": payload, "_cached_at": _now()}
    _PREVIEW_CACHE.move_to_end(cache_key)
    _enforce_cache_limit()
    # Track request count post-build
    _PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
    # Structured logging (opt-in)
    try:
        if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
            log_obj = {
                "event": "theme_preview_build",
                "theme": slug,
                "limit": limit,
                "colors": colors_key,
                "commander": commander_key,
                "build_ms": round(build_ms, 2),
                "curated_pct": payload["curated_pct"],
                "curated_total": payload["curated_total"],
                "sampled_total": payload["sampled_total"],
                "role_counts": role_counts_local,
                "cache_hit": False,
            }
            print(json.dumps(log_obj, separators=(",",":")))  # noqa: T201
    except Exception:
        pass
    # Post-build adaptive TTL evaluation & background refresher initialization
    _maybe_adapt_ttl(_now())
    _ensure_bg_refresh_thread()
    return payload


def _percentile(sorted_vals: List[float], pct: float) -> float:
    if not sorted_vals:
        return 0.0
    k = (len(sorted_vals) - 1) * pct
    f = int(k)
    c = min(f + 1, len(sorted_vals) - 1)
    if f == c:
        return sorted_vals[f]
    d0 = sorted_vals[f] * (c - k)
    d1 = sorted_vals[c] * (k - f)
    return d0 + d1

def preview_metrics() -> Dict[str, Any]:
    avg_ms = (_PREVIEW_BUILD_MS_TOTAL / _PREVIEW_BUILD_COUNT) if _PREVIEW_BUILD_COUNT else 0.0
    durations_list = sorted(list(_BUILD_DURATIONS))
    p95 = _percentile(durations_list, 0.95)
    # Role distribution actual vs target (aggregate)
    total_roles = sum(_ROLE_GLOBAL_COUNTS.values()) or 1
    target = {"payoff": 0.4, "enabler+support": 0.4, "wildcard": 0.2}
    actual_enabler_support = (_ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0)) / total_roles
    role_distribution = {
        "payoff": {
            "count": _ROLE_GLOBAL_COUNTS.get("payoff", 0),
            "actual_pct": round((_ROLE_GLOBAL_COUNTS.get("payoff", 0) / total_roles) * 100, 2),
            "target_pct": target["payoff"] * 100,
        },
        "enabler_support": {
            "count": _ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0),
            "actual_pct": round(actual_enabler_support * 100, 2),
            "target_pct": target["enabler+support"] * 100,
        },
        "wildcard": {
            "count": _ROLE_GLOBAL_COUNTS.get("wildcard", 0),
            "actual_pct": round((_ROLE_GLOBAL_COUNTS.get("wildcard", 0) / total_roles) * 100, 2),
            "target_pct": target["wildcard"] * 100,
        },
    }
    editorial_coverage_pct = round((_CURATED_GLOBAL / max(1, (_CURATED_GLOBAL + _SAMPLED_GLOBAL))) * 100, 2)
    per_theme_stats = {}
    for slug, data in list(_PREVIEW_PER_THEME.items())[:50]:
        durs = list(data.get("durations", []))
        sd = sorted(durs)
        p50 = _percentile(sd, 0.50)
        p95_local = _percentile(sd, 0.95)
        per_theme_stats[slug] = {
            "avg_ms": round(data["total_ms"] / max(1, data["builds"]), 2),
            "p50_ms": round(p50, 2),
            "p95_ms": round(p95_local, 2),
            "builds": data["builds"],
            "avg_curated_pct": round((data["curated"] / max(1, (data["curated"] + data["sampled"])) ) * 100, 2),
            "requests": _PREVIEW_PER_THEME_REQUESTS.get(slug, 0),
            "curated_total": data.get("curated", 0),
            "sampled_total": data.get("sampled", 0),
        }
    error_rate = 0.0
    total_req = _PREVIEW_REQUESTS or 0
    if total_req:
        error_rate = round((_PREVIEW_ERROR_COUNT / total_req) * 100, 2)
    # Example coverage enforcement flag: when curated coverage exceeds threshold (default 90%)
    try:
        enforce_threshold = float(os.getenv("EXAMPLE_ENFORCE_THRESHOLD", "90"))
    except Exception:
        enforce_threshold = 90.0
    example_enforcement_active = editorial_coverage_pct >= enforce_threshold
    return {
        "preview_requests": _PREVIEW_REQUESTS,
        "preview_cache_hits": _PREVIEW_CACHE_HITS,
        "preview_cache_entries": len(_PREVIEW_CACHE),
        "preview_avg_build_ms": round(avg_ms, 2),
        "preview_p95_build_ms": round(p95, 2),
        "preview_error_rate_pct": error_rate,
        "preview_client_fetch_errors": _PREVIEW_REQUEST_ERROR_COUNT,
        "preview_ttl_seconds": TTL_SECONDS,
    "preview_ttl_adaptive": _ADAPTATION_ENABLED,
    "preview_ttl_window": len(_RECENT_HITS),
        "preview_last_bust_at": _PREVIEW_LAST_BUST_AT,
        "role_distribution": role_distribution,
    "editorial_curated_vs_sampled_pct": editorial_coverage_pct,
    "example_enforcement_active": example_enforcement_active,
    "example_enforce_threshold_pct": enforce_threshold,
        "editorial_curated_total": _CURATED_GLOBAL,
        "editorial_sampled_total": _SAMPLED_GLOBAL,
        "per_theme": per_theme_stats,
        "per_theme_errors": dict(list(_PREVIEW_PER_THEME_ERRORS.items())[:50]),
        "curated_synergy_matrix_loaded": _CURATED_SYNERGY_MATRIX is not None,
        "curated_synergy_matrix_size": sum(len(v) for v in _CURATED_SYNERGY_MATRIX.values()) if _CURATED_SYNERGY_MATRIX else 0,
    }


def bust_preview_cache(reason: str | None = None) -> None:
    """Clear in-memory preview cache (e.g., after catalog rebuild or tagging).

    Exposed for orchestrator hooks. Keeps metrics counters (requests/hits) for
    observability; records last bust timestamp.
    """
    global _PREVIEW_CACHE, _PREVIEW_LAST_BUST_AT
    try:  # defensive; never raise
        _PREVIEW_CACHE.clear()
        import time as _t
        _PREVIEW_LAST_BUST_AT = _t.time()
    except Exception:
        pass