feat(web): Core Refactor Phase A — extract sampling and cache modules; add adaptive TTL + eviction heuristics, Redis PoC, and metrics wiring. Tests added for TTL, eviction, exports, splash-adaptive, card index, and service worker. Docs+roadmap updated.

This commit is contained in:
matt 2025-09-24 13:57:23 -07:00
parent c4a7fc48ea
commit a029d430c5
49 changed files with 3889 additions and 701 deletions

View file

@ -0,0 +1,137 @@
"""Card index construction & lookup (extracted from sampling / theme_preview).
Phase A refactor: Provides a thin API for building and querying the in-memory
card index keyed by tag/theme. Future enhancements may introduce a persistent
cache layer or precomputed artifact.
Public API:
maybe_build_index() -> None
get_tag_pool(tag: str) -> list[dict]
lookup_commander(name: str) -> dict | None
The index is rebuilt lazily when any of the CSV shard files change mtime.
"""
from __future__ import annotations
from pathlib import Path
import csv
import os
from typing import Any, Dict, List, Optional
CARD_FILES_GLOB = [
Path("csv_files/blue_cards.csv"),
Path("csv_files/white_cards.csv"),
Path("csv_files/black_cards.csv"),
Path("csv_files/red_cards.csv"),
Path("csv_files/green_cards.csv"),
Path("csv_files/colorless_cards.csv"),
Path("csv_files/cards.csv"), # fallback large file last
]
THEME_TAGS_COL = "themeTags"
NAME_COL = "name"
COLOR_IDENTITY_COL = "colorIdentity"
MANA_COST_COL = "manaCost"
RARITY_COL = "rarity"
_CARD_INDEX: Dict[str, List[Dict[str, Any]]] = {}
_CARD_INDEX_MTIME: float | None = None
_RARITY_NORM = {
"mythic rare": "mythic",
"mythic": "mythic",
"m": "mythic",
"rare": "rare",
"r": "rare",
"uncommon": "uncommon",
"u": "uncommon",
"common": "common",
"c": "common",
}
def _normalize_rarity(raw: str) -> str:
r = (raw or "").strip().lower()
return _RARITY_NORM.get(r, r)
def _resolve_card_files() -> List[Path]:
"""Return base card file list + any extra test files supplied via env.
Environment variable: CARD_INDEX_EXTRA_CSV can contain a comma or semicolon
separated list of additional CSV paths (used by tests to inject synthetic
edge cases without polluting production shards).
"""
files: List[Path] = list(CARD_FILES_GLOB)
extra = os.getenv("CARD_INDEX_EXTRA_CSV")
if extra:
for part in extra.replace(";", ",").split(","):
p = part.strip()
if not p:
continue
path_obj = Path(p)
# Include even if missing; maybe created later in test before build
files.append(path_obj)
return files
def maybe_build_index() -> None:
"""Rebuild the index if any card CSV mtime changed.
Incorporates any extra CSVs specified via CARD_INDEX_EXTRA_CSV.
"""
global _CARD_INDEX, _CARD_INDEX_MTIME
latest = 0.0
card_files = _resolve_card_files()
for p in card_files:
if p.exists():
mt = p.stat().st_mtime
if mt > latest:
latest = mt
if _CARD_INDEX and _CARD_INDEX_MTIME and latest <= _CARD_INDEX_MTIME:
return
new_index: Dict[str, List[Dict[str, Any]]] = {}
for p in card_files:
if not p.exists():
continue
try:
with p.open("r", encoding="utf-8", newline="") as fh:
reader = csv.DictReader(fh)
if not reader.fieldnames or THEME_TAGS_COL not in reader.fieldnames:
continue
for row in reader:
name = row.get(NAME_COL) or row.get("faceName") or ""
tags_raw = row.get(THEME_TAGS_COL) or ""
tags = [t.strip(" '[]") for t in tags_raw.split(',') if t.strip()] if tags_raw else []
if not tags:
continue
color_id = (row.get(COLOR_IDENTITY_COL) or "").strip()
mana_cost = (row.get(MANA_COST_COL) or "").strip()
rarity = _normalize_rarity(row.get(RARITY_COL) or "")
for tg in tags:
if not tg:
continue
new_index.setdefault(tg, []).append({
"name": name,
"color_identity": color_id,
"tags": tags,
"mana_cost": mana_cost,
"rarity": rarity,
"color_identity_list": list(color_id) if color_id else [],
"pip_colors": [c for c in mana_cost if c in {"W","U","B","R","G"}],
})
except Exception:
continue
_CARD_INDEX = new_index
_CARD_INDEX_MTIME = latest
def get_tag_pool(tag: str) -> List[Dict[str, Any]]:
return _CARD_INDEX.get(tag, [])
def lookup_commander(name: Optional[str]) -> Optional[Dict[str, Any]]:
if not name:
return None
needle = name.lower().strip()
for tag_cards in _CARD_INDEX.values():
for c in tag_cards:
if c.get("name", "").lower() == needle:
return c
return None

View file

@ -0,0 +1,323 @@
"""Preview cache utilities & adaptive policy (Core Refactor Phase A continued).
This module now owns:
- In-memory preview cache (OrderedDict)
- Cache bust helper
- Adaptive TTL policy & recent hit tracking
- Background refresh thread orchestration (warming top-K hot themes)
`theme_preview` orchestrator invokes `record_request_hit()` and
`maybe_adapt_ttl()` after each build/cache check, and calls `ensure_bg_thread()`
post-build. Metrics still aggregated in `theme_preview` but TTL state lives
here to prepare for future backend abstraction.
"""
from __future__ import annotations
from collections import OrderedDict, deque
from typing import Any, Dict, Tuple, Callable
import time as _t
import os
import json
import threading
import math
from .preview_metrics import record_eviction # type: ignore
# Phase 2 extraction: adaptive TTL band policy moved into preview_policy
from .preview_policy import (
compute_ttl_adjustment,
DEFAULT_TTL_BASE as _POLICY_TTL_BASE,
DEFAULT_TTL_MIN as _POLICY_TTL_MIN,
DEFAULT_TTL_MAX as _POLICY_TTL_MAX,
)
from .preview_cache_backend import redis_store # type: ignore
TTL_SECONDS = 600
# Backward-compat variable names retained (tests may reference) mapping to policy constants
_TTL_BASE = _POLICY_TTL_BASE
_TTL_MIN = _POLICY_TTL_MIN
_TTL_MAX = _POLICY_TTL_MAX
_ADAPT_SAMPLE_WINDOW = 120
_ADAPT_INTERVAL_S = 30
_ADAPTATION_ENABLED = (os.getenv("THEME_PREVIEW_ADAPTIVE") or "").lower() in {"1","true","yes","on"}
_RECENT_HITS: "deque[bool]" = deque(maxlen=_ADAPT_SAMPLE_WINDOW)
_LAST_ADAPT_AT: float | None = None
_BG_REFRESH_THREAD_STARTED = False
_BG_REFRESH_INTERVAL_S = int(os.getenv("THEME_PREVIEW_BG_REFRESH_INTERVAL") or 120)
_BG_REFRESH_ENABLED = (os.getenv("THEME_PREVIEW_BG_REFRESH") or "").lower() in {"1","true","yes","on"}
_BG_REFRESH_MIN = 30
_BG_REFRESH_MAX = max(300, _BG_REFRESH_INTERVAL_S * 5)
def record_request_hit(hit: bool) -> None:
_RECENT_HITS.append(hit)
def recent_hit_window() -> int:
return len(_RECENT_HITS)
def ttl_seconds() -> int:
return TTL_SECONDS
def _maybe_adapt_ttl(now: float) -> None:
"""Apply adaptive TTL adjustment using extracted policy.
Keeps prior guards (sample window, interval) for stability; only the
banded adjustment math has moved to preview_policy.
"""
global TTL_SECONDS, _LAST_ADAPT_AT
if not _ADAPTATION_ENABLED:
return
if len(_RECENT_HITS) < max(30, int(_ADAPT_SAMPLE_WINDOW * 0.5)):
return
if _LAST_ADAPT_AT and (now - _LAST_ADAPT_AT) < _ADAPT_INTERVAL_S:
return
hit_ratio = sum(1 for h in _RECENT_HITS if h) / len(_RECENT_HITS)
new_ttl = compute_ttl_adjustment(hit_ratio, TTL_SECONDS, _TTL_BASE, _TTL_MIN, _TTL_MAX)
if new_ttl != TTL_SECONDS:
TTL_SECONDS = new_ttl
try: # pragma: no cover - defensive logging
print(json.dumps({
"event": "theme_preview_ttl_adapt",
"hit_ratio": round(hit_ratio, 3),
"ttl": TTL_SECONDS,
})) # noqa: T201
except Exception:
pass
_LAST_ADAPT_AT = now
def maybe_adapt_ttl() -> None:
_maybe_adapt_ttl(_t.time())
def _bg_refresh_loop(build_top_slug: Callable[[str], None], get_hot_slugs: Callable[[], list[str]]): # pragma: no cover
while True:
if not _BG_REFRESH_ENABLED:
return
try:
for slug in get_hot_slugs():
try:
build_top_slug(slug)
except Exception:
continue
except Exception:
pass
_t.sleep(_BG_REFRESH_INTERVAL_S)
def ensure_bg_thread(build_top_slug: Callable[[str], None], get_hot_slugs: Callable[[], list[str]]): # pragma: no cover
global _BG_REFRESH_THREAD_STARTED
if _BG_REFRESH_THREAD_STARTED or not _BG_REFRESH_ENABLED:
return
try:
th = threading.Thread(target=_bg_refresh_loop, args=(build_top_slug, get_hot_slugs), name="theme_preview_bg_refresh", daemon=True)
th.start()
_BG_REFRESH_THREAD_STARTED = True
except Exception:
pass
PREVIEW_CACHE: "OrderedDict[Tuple[str, int, str | None, str | None, str], Dict[str, Any]]" = OrderedDict()
# Cache entry shape (dict) — groundwork for adaptive eviction (Phase 2)
# Keys:
# payload: preview payload dict
# _cached_at / cached_at: epoch seconds when stored (TTL reference; _cached_at kept for backward compat)
# inserted_at: epoch seconds first insertion
# last_access: epoch seconds of last successful cache hit
# hit_count: int number of cache hits (excludes initial store)
# build_cost_ms: float build duration captured at store time (used for cost-based protection)
def register_cache_hit(key: Tuple[str, int, str | None, str | None, str]) -> None:
entry = PREVIEW_CACHE.get(key)
if not entry:
return
now = _t.time()
# Initialize metadata if legacy entry present
if "inserted_at" not in entry:
entry["inserted_at"] = entry.get("_cached_at", now)
entry["last_access"] = now
entry["hit_count"] = int(entry.get("hit_count", 0)) + 1
def store_cache_entry(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], build_cost_ms: float) -> None:
now = _t.time()
PREVIEW_CACHE[key] = {
"payload": payload,
"_cached_at": now, # legacy field name
"cached_at": now,
"inserted_at": now,
"last_access": now,
"hit_count": 0,
"build_cost_ms": float(build_cost_ms),
}
PREVIEW_CACHE.move_to_end(key)
# Optional Redis write-through (best-effort)
try:
if os.getenv("THEME_PREVIEW_REDIS_URL") and not os.getenv("THEME_PREVIEW_REDIS_DISABLE"):
redis_store(key, payload, int(TTL_SECONDS), build_cost_ms)
except Exception:
pass
# --- Adaptive Eviction Weight & Threshold Resolution (Phase 2 Step 4) --- #
_EVICT_WEIGHTS_CACHE: Dict[str, float] | None = None
_EVICT_THRESH_CACHE: Tuple[float, float, float] | None = None
def _resolve_eviction_weights() -> Dict[str, float]:
global _EVICT_WEIGHTS_CACHE
if _EVICT_WEIGHTS_CACHE is not None:
return _EVICT_WEIGHTS_CACHE
def _f(env_key: str, default: float) -> float:
raw = os.getenv(env_key)
if not raw:
return default
try:
return float(raw)
except Exception:
return default
_EVICT_WEIGHTS_CACHE = {
"W_HITS": _f("THEME_PREVIEW_EVICT_W_HITS", 3.0),
"W_RECENCY": _f("THEME_PREVIEW_EVICT_W_RECENCY", 2.0),
"W_COST": _f("THEME_PREVIEW_EVICT_W_COST", 1.0),
"W_AGE": _f("THEME_PREVIEW_EVICT_W_AGE", 1.5),
}
return _EVICT_WEIGHTS_CACHE
def _resolve_cost_thresholds() -> Tuple[float, float, float]:
global _EVICT_THRESH_CACHE
if _EVICT_THRESH_CACHE is not None:
return _EVICT_THRESH_CACHE
raw = os.getenv("THEME_PREVIEW_EVICT_COST_THRESHOLDS", "5,15,40")
parts = [p.strip() for p in raw.split(',') if p.strip()]
nums: list[float] = []
for p in parts:
try:
nums.append(float(p))
except Exception:
pass
while len(nums) < 3:
# pad with defaults if insufficient
defaults = [5.0, 15.0, 40.0]
nums.append(defaults[len(nums)])
nums = sorted(nums[:3])
_EVICT_THRESH_CACHE = (nums[0], nums[1], nums[2])
return _EVICT_THRESH_CACHE
def _cost_bucket(build_cost_ms: float) -> int:
t1, t2, t3 = _resolve_cost_thresholds()
if build_cost_ms < t1:
return 0
if build_cost_ms < t2:
return 1
if build_cost_ms < t3:
return 2
return 3
def compute_protection_score(entry: Dict[str, Any], now: float | None = None) -> float:
"""Compute protection score (higher = more protected from eviction).
Score components:
- hit_count (log scaled) weighted by W_HITS
- recency (inverse minutes since last access) weighted by W_RECENCY
- build cost bucket weighted by W_COST
- age penalty (minutes since insert) weighted by W_AGE (subtracted)
"""
if now is None:
now = _t.time()
weights = _resolve_eviction_weights()
inserted = float(entry.get("inserted_at", now))
last_access = float(entry.get("last_access", inserted))
hits = int(entry.get("hit_count", 0))
build_cost_ms = float(entry.get("build_cost_ms", 0.0))
minutes_since_last = max(0.0, (now - last_access) / 60.0)
minutes_since_insert = max(0.0, (now - inserted) / 60.0)
recency_score = 1.0 / (1.0 + minutes_since_last)
age_score = minutes_since_insert
cost_b = _cost_bucket(build_cost_ms)
score = (
weights["W_HITS"] * math.log(1 + hits)
+ weights["W_RECENCY"] * recency_score
+ weights["W_COST"] * cost_b
- weights["W_AGE"] * age_score
)
return float(score)
# --- Eviction Logic (Phase 2 Step 6) --- #
def _cache_max() -> int:
try:
raw = os.getenv("THEME_PREVIEW_CACHE_MAX") or "400"
v = int(raw)
if v <= 0:
raise ValueError
return v
except Exception:
return 400
def evict_if_needed() -> None:
"""Adaptive eviction replacing FIFO.
Strategy:
- If size <= limit: no-op
- If size > 2*limit: emergency overflow path (age-based removal until within limit)
- Else: remove lowest protection score entry (single) if over limit
"""
try:
# Removed previous hard floor (50) to allow test scenarios with small limits.
# Operational deployments can still set higher env value. Tests rely on low limits
# (e.g., 5) to exercise eviction deterministically.
limit = _cache_max()
size = len(PREVIEW_CACHE)
if size <= limit:
return
now = _t.time()
# Emergency overflow path
if size > 2 * limit:
while len(PREVIEW_CACHE) > limit:
# Oldest by inserted_at/_cached_at
oldest_key = min(
PREVIEW_CACHE.items(),
key=lambda kv: kv[1].get("inserted_at", kv[1].get("_cached_at", 0.0)),
)[0]
entry = PREVIEW_CACHE.pop(oldest_key)
meta = {
"hit_count": int(entry.get("hit_count", 0)),
"age_ms": int((now - entry.get("inserted_at", now)) * 1000),
"build_cost_ms": float(entry.get("build_cost_ms", 0.0)),
"protection_score": compute_protection_score(entry, now),
"reason": "emergency_overflow",
"cache_limit": limit,
"size_before": size,
"size_after": len(PREVIEW_CACHE),
}
record_eviction(meta)
return
# Standard single-entry score-based eviction
lowest_key = None
lowest_score = None
for key, entry in PREVIEW_CACHE.items():
score = compute_protection_score(entry, now)
if lowest_score is None or score < lowest_score:
lowest_key = key
lowest_score = score
if lowest_key is not None:
entry = PREVIEW_CACHE.pop(lowest_key)
meta = {
"hit_count": int(entry.get("hit_count", 0)),
"age_ms": int((now - entry.get("inserted_at", now)) * 1000),
"build_cost_ms": float(entry.get("build_cost_ms", 0.0)),
"protection_score": float(lowest_score if lowest_score is not None else 0.0),
"reason": "low_score",
"cache_limit": limit,
"size_before": size,
"size_after": len(PREVIEW_CACHE),
}
record_eviction(meta)
except Exception:
# Fail quiet; eviction is best-effort
pass
_PREVIEW_LAST_BUST_AT: float | None = None
def bust_preview_cache(reason: str | None = None) -> None: # pragma: no cover (trivial)
global PREVIEW_CACHE, _PREVIEW_LAST_BUST_AT
try:
PREVIEW_CACHE.clear()
_PREVIEW_LAST_BUST_AT = _t.time()
except Exception:
pass
def preview_cache_last_bust_at() -> float | None:
return _PREVIEW_LAST_BUST_AT

View file

@ -0,0 +1,113 @@
"""Cache backend abstraction (Phase 2 extension) with Redis PoC.
The in-memory cache remains authoritative for adaptive eviction heuristics.
This backend layer provides optional read-through / write-through to Redis
for latency & CPU comparison. It is intentionally minimal:
Environment:
THEME_PREVIEW_REDIS_URL=redis://host:port/db -> enable PoC if redis-py importable
THEME_PREVIEW_REDIS_DISABLE=1 -> hard disable even if URL present
Behavior:
- On store: serialize payload + metadata into JSON and SETEX with TTL.
- On get (memory miss only): attempt Redis GET and rehydrate (respect TTL).
- Failures are swallowed; metrics track attempts/hits/errors.
No eviction coordination is attempted; Redis TTL handles expiry. The goal is
purely observational at this stage.
"""
from __future__ import annotations
from typing import Optional, Dict, Any, Tuple
import json
import os
import time
try: # lazy optional dependency
import redis # type: ignore
except Exception: # pragma: no cover - absence path
redis = None # type: ignore
_URL = os.getenv("THEME_PREVIEW_REDIS_URL")
_DISABLED = (os.getenv("THEME_PREVIEW_REDIS_DISABLE") or "").lower() in {"1","true","yes","on"}
_CLIENT = None
_INIT_ERR: str | None = None
def _init() -> None:
global _CLIENT, _INIT_ERR
if _CLIENT is not None or _INIT_ERR is not None:
return
if _DISABLED or not _URL or not redis:
_INIT_ERR = "disabled_or_missing"
return
try:
_CLIENT = redis.Redis.from_url(_URL, socket_timeout=0.25) # type: ignore
# lightweight ping (non-fatal)
try:
_CLIENT.ping()
except Exception:
pass
except Exception as e: # pragma: no cover - network/dep issues
_INIT_ERR = f"init_error:{e}"[:120]
def backend_info() -> Dict[str, Any]:
return {
"enabled": bool(_CLIENT),
"init_error": _INIT_ERR,
"url_present": bool(_URL),
}
def _serialize(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], build_cost_ms: float) -> str:
return json.dumps({
"k": list(key),
"p": payload,
"bc": build_cost_ms,
"ts": time.time(),
}, separators=(",", ":"))
def redis_store(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], ttl_seconds: int, build_cost_ms: float) -> bool:
_init()
if not _CLIENT:
return False
try:
data = _serialize(key, payload, build_cost_ms)
# Compose a simple namespaced key; join tuple parts with '|'
skey = "tpv:" + "|".join([str(part) for part in key])
_CLIENT.setex(skey, ttl_seconds, data)
return True
except Exception: # pragma: no cover
return False
def redis_get(key: Tuple[str, int, str | None, str | None, str]) -> Optional[Dict[str, Any]]:
_init()
if not _CLIENT:
return None
try:
skey = "tpv:" + "|".join([str(part) for part in key])
raw: bytes | None = _CLIENT.get(skey) # type: ignore
if not raw:
return None
obj = json.loads(raw.decode("utf-8"))
# Expect shape from _serialize
payload = obj.get("p")
if not isinstance(payload, dict):
return None
return {
"payload": payload,
"_cached_at": float(obj.get("ts") or 0),
"cached_at": float(obj.get("ts") or 0),
"inserted_at": float(obj.get("ts") or 0),
"last_access": float(obj.get("ts") or 0),
"hit_count": 0,
"build_cost_ms": float(obj.get("bc") or 0.0),
}
except Exception: # pragma: no cover
return None
__all__ = [
"backend_info",
"redis_store",
"redis_get",
]

View file

@ -0,0 +1,285 @@
"""Metrics aggregation for theme preview service.
Extracted from `theme_preview.py` (Phase 2 refactor) to isolate
metrics/state reporting from orchestration & caching logic. This allows
future experimentation with alternative cache backends / eviction without
coupling metrics concerns.
Public API:
record_build_duration(ms: float)
record_role_counts(role_counts: dict[str,int])
record_curated_sampled(curated: int, sampled: int)
record_per_theme(slug: str, build_ms: float, curated: int, sampled: int)
record_request(hit: bool, error: bool = False, client_error: bool = False)
record_per_theme_error(slug: str)
preview_metrics() -> dict
The consuming orchestrator remains responsible for calling these hooks.
"""
from __future__ import annotations
from typing import Any, Dict, List
import os
# Global counters (mirrors previous names for backward compatibility where tests may introspect)
_PREVIEW_BUILD_MS_TOTAL = 0.0
_PREVIEW_BUILD_COUNT = 0
_BUILD_DURATIONS: List[float] = []
_ROLE_GLOBAL_COUNTS: dict[str, int] = {}
_CURATED_GLOBAL = 0
_SAMPLED_GLOBAL = 0
_PREVIEW_PER_THEME: dict[str, Dict[str, Any]] = {}
_PREVIEW_PER_THEME_REQUESTS: dict[str, int] = {}
_PREVIEW_PER_THEME_ERRORS: dict[str, int] = {}
_PREVIEW_REQUESTS = 0
_PREVIEW_CACHE_HITS = 0
_PREVIEW_ERROR_COUNT = 0
_PREVIEW_REQUEST_ERROR_COUNT = 0
_EVICTION_TOTAL = 0
_EVICTION_BY_REASON: dict[str, int] = {}
_EVICTION_LAST: dict[str, Any] | None = None
_SPLASH_OFF_COLOR_TOTAL = 0
_SPLASH_PREVIEWS_WITH_PENALTY = 0
_SPLASH_PENALTY_CARD_EVENTS = 0
_REDIS_GET_ATTEMPTS = 0
_REDIS_GET_HITS = 0
_REDIS_GET_ERRORS = 0
_REDIS_STORE_ATTEMPTS = 0
_REDIS_STORE_ERRORS = 0
def record_redis_get(hit: bool, error: bool = False):
global _REDIS_GET_ATTEMPTS, _REDIS_GET_HITS, _REDIS_GET_ERRORS
_REDIS_GET_ATTEMPTS += 1
if hit:
_REDIS_GET_HITS += 1
if error:
_REDIS_GET_ERRORS += 1
def record_redis_store(error: bool = False):
global _REDIS_STORE_ATTEMPTS, _REDIS_STORE_ERRORS
_REDIS_STORE_ATTEMPTS += 1
if error:
_REDIS_STORE_ERRORS += 1
# External state accessors (injected via set functions) to avoid import cycle
_ttl_seconds_fn = None
_recent_hit_window_fn = None
_cache_len_fn = None
_last_bust_at_fn = None
_curated_synergy_loaded_fn = None
_curated_synergy_size_fn = None
def configure_external_access(
ttl_seconds_fn,
recent_hit_window_fn,
cache_len_fn,
last_bust_at_fn,
curated_synergy_loaded_fn,
curated_synergy_size_fn,
):
global _ttl_seconds_fn, _recent_hit_window_fn, _cache_len_fn, _last_bust_at_fn, _curated_synergy_loaded_fn, _curated_synergy_size_fn
_ttl_seconds_fn = ttl_seconds_fn
_recent_hit_window_fn = recent_hit_window_fn
_cache_len_fn = cache_len_fn
_last_bust_at_fn = last_bust_at_fn
_curated_synergy_loaded_fn = curated_synergy_loaded_fn
_curated_synergy_size_fn = curated_synergy_size_fn
def record_build_duration(ms: float) -> None:
global _PREVIEW_BUILD_MS_TOTAL, _PREVIEW_BUILD_COUNT
_PREVIEW_BUILD_MS_TOTAL += ms
_PREVIEW_BUILD_COUNT += 1
_BUILD_DURATIONS.append(ms)
def record_role_counts(role_counts: Dict[str, int]) -> None:
for r, c in role_counts.items():
_ROLE_GLOBAL_COUNTS[r] = _ROLE_GLOBAL_COUNTS.get(r, 0) + c
def record_curated_sampled(curated: int, sampled: int) -> None:
global _CURATED_GLOBAL, _SAMPLED_GLOBAL
_CURATED_GLOBAL += curated
_SAMPLED_GLOBAL += sampled
def record_per_theme(slug: str, build_ms: float, curated: int, sampled: int) -> None:
data = _PREVIEW_PER_THEME.setdefault(slug, {"total_ms": 0.0, "builds": 0, "durations": [], "curated": 0, "sampled": 0})
data["total_ms"] += build_ms
data["builds"] += 1
durs = data["durations"]
durs.append(build_ms)
if len(durs) > 100:
del durs[0: len(durs) - 100]
data["curated"] += curated
data["sampled"] += sampled
def record_request(hit: bool, error: bool = False, client_error: bool = False) -> None:
global _PREVIEW_REQUESTS, _PREVIEW_CACHE_HITS, _PREVIEW_ERROR_COUNT, _PREVIEW_REQUEST_ERROR_COUNT
_PREVIEW_REQUESTS += 1
if hit:
_PREVIEW_CACHE_HITS += 1
if error:
_PREVIEW_ERROR_COUNT += 1
if client_error:
_PREVIEW_REQUEST_ERROR_COUNT += 1
def record_per_theme_error(slug: str) -> None:
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
def _percentile(sorted_vals: List[float], pct: float) -> float:
if not sorted_vals:
return 0.0
k = (len(sorted_vals) - 1) * pct
f = int(k)
c = min(f + 1, len(sorted_vals) - 1)
if f == c:
return sorted_vals[f]
d0 = sorted_vals[f] * (c - k)
d1 = sorted_vals[c] * (k - f)
return d0 + d1
def preview_metrics() -> Dict[str, Any]:
ttl_seconds = _ttl_seconds_fn() if _ttl_seconds_fn else 0
recent_window = _recent_hit_window_fn() if _recent_hit_window_fn else 0
cache_len = _cache_len_fn() if _cache_len_fn else 0
last_bust = _last_bust_at_fn() if _last_bust_at_fn else None
avg_ms = (_PREVIEW_BUILD_MS_TOTAL / _PREVIEW_BUILD_COUNT) if _PREVIEW_BUILD_COUNT else 0.0
durations_list = sorted(list(_BUILD_DURATIONS))
p95 = _percentile(durations_list, 0.95)
# Role distribution aggregate
total_roles = sum(_ROLE_GLOBAL_COUNTS.values()) or 1
target = {"payoff": 0.4, "enabler+support": 0.4, "wildcard": 0.2}
actual_enabler_support = (_ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0)) / total_roles
role_distribution = {
"payoff": {
"count": _ROLE_GLOBAL_COUNTS.get("payoff", 0),
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("payoff", 0) / total_roles) * 100, 2),
"target_pct": target["payoff"] * 100,
},
"enabler_support": {
"count": _ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0),
"actual_pct": round(actual_enabler_support * 100, 2),
"target_pct": target["enabler+support"] * 100,
},
"wildcard": {
"count": _ROLE_GLOBAL_COUNTS.get("wildcard", 0),
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("wildcard", 0) / total_roles) * 100, 2),
"target_pct": target["wildcard"] * 100,
},
}
editorial_coverage_pct = round((_CURATED_GLOBAL / max(1, (_CURATED_GLOBAL + _SAMPLED_GLOBAL))) * 100, 2)
per_theme_stats: Dict[str, Any] = {}
for slug, data in list(_PREVIEW_PER_THEME.items())[:50]:
durs = list(data.get("durations", []))
sd = sorted(durs)
p50 = _percentile(sd, 0.50)
p95_local = _percentile(sd, 0.95)
per_theme_stats[slug] = {
"avg_ms": round(data["total_ms"] / max(1, data["builds"]), 2),
"p50_ms": round(p50, 2),
"p95_ms": round(p95_local, 2),
"builds": data["builds"],
"avg_curated_pct": round((data["curated"] / max(1, (data["curated"] + data["sampled"])) ) * 100, 2),
"requests": _PREVIEW_PER_THEME_REQUESTS.get(slug, 0),
"curated_total": data.get("curated", 0),
"sampled_total": data.get("sampled", 0),
}
error_rate = 0.0
total_req = _PREVIEW_REQUESTS or 0
if total_req:
error_rate = round((_PREVIEW_ERROR_COUNT / total_req) * 100, 2)
try:
enforce_threshold = float(os.getenv("EXAMPLE_ENFORCE_THRESHOLD", "90"))
except Exception: # pragma: no cover
enforce_threshold = 90.0
example_enforcement_active = editorial_coverage_pct >= enforce_threshold
curated_synergy_loaded = _curated_synergy_loaded_fn() if _curated_synergy_loaded_fn else False
curated_synergy_size = _curated_synergy_size_fn() if _curated_synergy_size_fn else 0
return {
"preview_requests": _PREVIEW_REQUESTS,
"preview_cache_hits": _PREVIEW_CACHE_HITS,
"preview_cache_entries": cache_len,
"preview_cache_evictions": _EVICTION_TOTAL,
"preview_cache_evictions_by_reason": dict(_EVICTION_BY_REASON),
"preview_cache_eviction_last": _EVICTION_LAST,
"preview_avg_build_ms": round(avg_ms, 2),
"preview_p95_build_ms": round(p95, 2),
"preview_error_rate_pct": error_rate,
"preview_client_fetch_errors": _PREVIEW_REQUEST_ERROR_COUNT,
"preview_ttl_seconds": ttl_seconds,
"preview_ttl_adaptive": True,
"preview_ttl_window": recent_window,
"preview_last_bust_at": last_bust,
"role_distribution": role_distribution,
"editorial_curated_vs_sampled_pct": editorial_coverage_pct,
"example_enforcement_active": example_enforcement_active,
"example_enforce_threshold_pct": enforce_threshold,
"editorial_curated_total": _CURATED_GLOBAL,
"editorial_sampled_total": _SAMPLED_GLOBAL,
"per_theme": per_theme_stats,
"per_theme_errors": dict(list(_PREVIEW_PER_THEME_ERRORS.items())[:50]),
"curated_synergy_matrix_loaded": curated_synergy_loaded,
"curated_synergy_matrix_size": curated_synergy_size,
"splash_off_color_total_cards": _SPLASH_OFF_COLOR_TOTAL,
"splash_previews_with_penalty": _SPLASH_PREVIEWS_WITH_PENALTY,
"splash_penalty_reason_events": _SPLASH_PENALTY_CARD_EVENTS,
"redis_get_attempts": _REDIS_GET_ATTEMPTS,
"redis_get_hits": _REDIS_GET_HITS,
"redis_get_errors": _REDIS_GET_ERRORS,
"redis_store_attempts": _REDIS_STORE_ATTEMPTS,
"redis_store_errors": _REDIS_STORE_ERRORS,
}
__all__ = [
"record_build_duration",
"record_role_counts",
"record_curated_sampled",
"record_per_theme",
"record_request",
"record_per_theme_request",
"record_per_theme_error",
"record_eviction",
"preview_metrics",
"configure_external_access",
"record_splash_analytics",
"record_redis_get",
"record_redis_store",
]
def record_per_theme_request(slug: str) -> None:
"""Increment request counter for a specific theme (cache hit or miss).
This was previously in the monolith; extracted to keep per-theme request
counts consistent with new metrics module ownership.
"""
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
def record_eviction(meta: Dict[str, Any]) -> None:
"""Record a cache eviction event.
meta expected keys: reason, hit_count, age_ms, build_cost_ms, protection_score, cache_limit,
size_before, size_after.
"""
global _EVICTION_TOTAL, _EVICTION_LAST
_EVICTION_TOTAL += 1
reason = meta.get("reason", "unknown")
_EVICTION_BY_REASON[reason] = _EVICTION_BY_REASON.get(reason, 0) + 1
_EVICTION_LAST = meta
# Optional structured log
try: # pragma: no cover
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
import json as _json
print(_json.dumps({"event": "theme_preview_cache_evict", **meta}, separators=(",",":"))) # noqa: T201
except Exception:
pass
def record_splash_analytics(off_color_card_count: int, penalty_reason_events: int) -> None:
"""Record splash off-color analytics for a single preview build.
off_color_card_count: number of sampled cards marked with _splash_off_color flag.
penalty_reason_events: count of 'splash_off_color_penalty' reason entries encountered.
"""
global _SPLASH_OFF_COLOR_TOTAL, _SPLASH_PREVIEWS_WITH_PENALTY, _SPLASH_PENALTY_CARD_EVENTS
if off_color_card_count > 0:
_SPLASH_PREVIEWS_WITH_PENALTY += 1
_SPLASH_OFF_COLOR_TOTAL += off_color_card_count
if penalty_reason_events > 0:
_SPLASH_PENALTY_CARD_EVENTS += penalty_reason_events

View file

@ -0,0 +1,167 @@
"""Preview policy module (Phase 2 extraction).
Extracts adaptive TTL band logic so experimentation can occur without
touching core cache data structures. Future extensions will add:
- Environment-variable overrides for band thresholds & step sizes
- Adaptive eviction strategy (hit-ratio + recency hybrid)
- Backend abstraction tuning knobs (e.g., Redis TTL harmonization)
Current exported API is intentionally small/stable:
compute_ttl_adjustment(hit_ratio: float, current_ttl: int,
base: int = DEFAULT_TTL_BASE,
ttl_min: int = DEFAULT_TTL_MIN,
ttl_max: int = DEFAULT_TTL_MAX) -> int
Given the recent hit ratio (0..1) and current TTL, returns the new TTL
after applying banded adjustment rules. Never mutates globals; caller
decides whether to commit the change.
Constants kept here mirror the prior inline values from preview_cache.
They are NOT yet configurable via env to keep behavior unchanged for
existing tests. A follow-up task will add env override + validation.
"""
from __future__ import annotations
from dataclasses import dataclass
import os
__all__ = [
"DEFAULT_TTL_BASE",
"DEFAULT_TTL_MIN",
"DEFAULT_TTL_MAX",
"BAND_LOW_CRITICAL",
"BAND_LOW_MODERATE",
"BAND_HIGH_GROW",
"compute_ttl_adjustment",
]
DEFAULT_TTL_BASE = 600
DEFAULT_TTL_MIN = 300
DEFAULT_TTL_MAX = 900
# Default hit ratio band thresholds (exclusive upper bounds for each tier)
_DEFAULT_BAND_LOW_CRITICAL = 0.25 # Severe miss rate shrink TTL aggressively
_DEFAULT_BAND_LOW_MODERATE = 0.55 # Mild miss bias converge back toward base
_DEFAULT_BAND_HIGH_GROW = 0.75 # Healthy hit rate modest growth
# Public band variables (may be overridden via env at import time)
BAND_LOW_CRITICAL = _DEFAULT_BAND_LOW_CRITICAL
BAND_LOW_MODERATE = _DEFAULT_BAND_LOW_MODERATE
BAND_HIGH_GROW = _DEFAULT_BAND_HIGH_GROW
@dataclass(frozen=True)
class AdjustmentSteps:
low_critical: int = -60
low_mod_decrease: int = -30
low_mod_increase: int = 30
high_grow: int = 60
high_peak: int = 90 # very high hit ratio
_STEPS = AdjustmentSteps()
# --- Environment Override Support (POLICY Env overrides task) --- #
_ENV_APPLIED = False
def _parse_float_env(name: str, default: float) -> float:
raw = os.getenv(name)
if not raw:
return default
try:
v = float(raw)
if not (0.0 <= v <= 1.0):
return default
return v
except Exception:
return default
def _parse_int_env(name: str, default: int) -> int:
raw = os.getenv(name)
if not raw:
return default
try:
return int(raw)
except Exception:
return default
def _apply_env_overrides() -> None:
"""Idempotently apply environment overrides for bands & step sizes.
Env vars:
THEME_PREVIEW_TTL_BASE / _MIN / _MAX (ints)
THEME_PREVIEW_TTL_BANDS (comma floats: low_critical,low_moderate,high_grow)
THEME_PREVIEW_TTL_STEPS (comma ints: low_critical,low_mod_dec,low_mod_inc,high_grow,high_peak)
Invalid / partial specs fall back to defaults. Bands are validated to be
strictly increasing within (0,1). If validation fails, defaults retained.
"""
global DEFAULT_TTL_BASE, DEFAULT_TTL_MIN, DEFAULT_TTL_MAX
global BAND_LOW_CRITICAL, BAND_LOW_MODERATE, BAND_HIGH_GROW, _STEPS, _ENV_APPLIED
if _ENV_APPLIED:
return
DEFAULT_TTL_BASE = _parse_int_env("THEME_PREVIEW_TTL_BASE", DEFAULT_TTL_BASE)
DEFAULT_TTL_MIN = _parse_int_env("THEME_PREVIEW_TTL_MIN", DEFAULT_TTL_MIN)
DEFAULT_TTL_MAX = _parse_int_env("THEME_PREVIEW_TTL_MAX", DEFAULT_TTL_MAX)
# Ensure ordering min <= base <= max
if DEFAULT_TTL_MIN > DEFAULT_TTL_BASE:
DEFAULT_TTL_MIN = min(DEFAULT_TTL_MIN, DEFAULT_TTL_BASE)
if DEFAULT_TTL_BASE > DEFAULT_TTL_MAX:
DEFAULT_TTL_MAX = max(DEFAULT_TTL_BASE, DEFAULT_TTL_MAX)
bands_raw = os.getenv("THEME_PREVIEW_TTL_BANDS")
if bands_raw:
parts = [p.strip() for p in bands_raw.split(',') if p.strip()]
vals: list[float] = []
for p in parts[:3]:
try:
vals.append(float(p))
except Exception:
pass
if len(vals) == 3:
a, b, c = vals
if 0 < a < b < c < 1:
BAND_LOW_CRITICAL, BAND_LOW_MODERATE, BAND_HIGH_GROW = a, b, c
steps_raw = os.getenv("THEME_PREVIEW_TTL_STEPS")
if steps_raw:
parts = [p.strip() for p in steps_raw.split(',') if p.strip()]
ints: list[int] = []
for p in parts[:5]:
try:
ints.append(int(p))
except Exception:
pass
if len(ints) == 5:
_STEPS = AdjustmentSteps(
low_critical=ints[0],
low_mod_decrease=ints[1],
low_mod_increase=ints[2],
high_grow=ints[3],
high_peak=ints[4],
)
_ENV_APPLIED = True
# Apply overrides at import time (safe & idempotent)
_apply_env_overrides()
def compute_ttl_adjustment(
hit_ratio: float,
current_ttl: int,
base: int = DEFAULT_TTL_BASE,
ttl_min: int = DEFAULT_TTL_MIN,
ttl_max: int = DEFAULT_TTL_MAX,
) -> int:
"""Return a new TTL based on hit ratio & current TTL.
Logic mirrors the original inline implementation; extracted for clarity.
"""
new_ttl = current_ttl
if hit_ratio < BAND_LOW_CRITICAL:
new_ttl = max(ttl_min, current_ttl + _STEPS.low_critical)
elif hit_ratio < BAND_LOW_MODERATE:
if current_ttl > base:
new_ttl = max(base, current_ttl + _STEPS.low_mod_decrease)
elif current_ttl < base:
new_ttl = min(base, current_ttl + _STEPS.low_mod_increase)
# else already at base no change
elif hit_ratio < BAND_HIGH_GROW:
new_ttl = min(ttl_max, current_ttl + _STEPS.high_grow)
else:
new_ttl = min(ttl_max, current_ttl + _STEPS.high_peak)
return new_ttl

View file

@ -0,0 +1,259 @@
"""Sampling utilities extracted from theme_preview (Core Refactor Phase A - initial extraction).
This module contains card index construction and the deterministic sampling
pipeline used to build preview role buckets. Logic moved with minimal changes
to preserve behavior; future refactor steps will further decompose (e.g.,
separating card index & rarity calibration, introducing typed models).
Public (stable) surface for Phase A:
sample_real_cards_for_theme(theme: str, limit: int, colors_filter: str | None,
*, synergies: list[str], commander: str | None) -> list[dict]
Internal helpers intentionally start with an underscore to discourage external
use; they may change in subsequent refactor steps.
"""
from __future__ import annotations
import random
from typing import Any, Dict, List, Optional, TypedDict
from .card_index import maybe_build_index, get_tag_pool, lookup_commander
from .sampling_config import (
COMMANDER_COLOR_FILTER_STRICT,
COMMANDER_OVERLAP_BONUS,
COMMANDER_THEME_MATCH_BONUS,
SPLASH_OFF_COLOR_PENALTY,
SPLASH_ADAPTIVE_ENABLED,
parse_splash_adaptive_scale,
ROLE_BASE_WEIGHTS,
ROLE_SATURATION_PENALTY,
rarity_weight_base,
parse_rarity_diversity_targets,
RARITY_DIVERSITY_OVER_PENALTY,
)
_CARD_INDEX_DEPRECATED: Dict[str, List[Dict[str, Any]]] = {} # kept for back-compat in tests; will be removed
class SampledCard(TypedDict, total=False):
"""Typed shape for a sampled card entry emitted to preview layer.
total=False because curated examples / synthetic placeholders may lack
full DB-enriched fields (mana_cost, rarity, color_identity_list, etc.).
"""
name: str
colors: List[str]
roles: List[str]
tags: List[str]
score: float
reasons: List[str]
mana_cost: str
rarity: str
color_identity_list: List[str]
pip_colors: List[str]
def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
tag_set = set(tags)
synergy_overlap = tag_set.intersection(synergies)
if theme in tag_set:
return "payoff"
if len(synergy_overlap) >= 2:
return "enabler"
if len(synergy_overlap) == 1:
return "support"
return "wildcard"
def _seed_from(theme: str, commander: Optional[str]) -> int:
base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
h = 0
for b in base:
h = (h * 131 + b) & 0xFFFFFFFF
return h or 1
def _deterministic_shuffle(items: List[Any], seed: int) -> None:
rnd = random.Random(seed)
rnd.shuffle(items)
def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
tag_set = set(tags)
synergy_overlap = len(tag_set.intersection(synergies))
score = 0.0
if theme in tag_set:
score += 3.0
score += synergy_overlap * 1.2
score += ROLE_BASE_WEIGHTS.get(role, 0.5)
return score
def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
if not commander_tags or not synergy_set:
return 0.0
overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
if overlap_synergy <= 0:
return 0.0
return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))
def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]: # thin wrapper for legacy name
return lookup_commander(commander)
def sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[SampledCard]:
"""Return scored, role-classified real cards for a theme.
Mirrors prior `_sample_real_cards_for_theme` behavior for parity.
"""
maybe_build_index()
pool = get_tag_pool(theme)
if not pool:
return []
commander_card = _lookup_commander(commander)
commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
if colors_filter:
allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
if allowed:
pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
allow_splash = len(commander_colors) >= 4
new_pool: List[Dict[str, Any]] = []
for c in pool:
ci = set(c.get("color_identity", ""))
if not ci or ci.issubset(commander_colors):
new_pool.append(c)
continue
if allow_splash:
off = ci - commander_colors
if len(off) == 1:
c["_splash_off_color"] = True # type: ignore
new_pool.append(c)
continue
pool = new_pool
seen_names: set[str] = set()
payoff: List[SampledCard] = []
enabler: List[SampledCard] = []
support: List[SampledCard] = []
wildcard: List[SampledCard] = []
rarity_counts: Dict[str, int] = {}
rarity_diversity = parse_rarity_diversity_targets()
synergy_set = set(synergies)
rarity_weight_cfg = rarity_weight_base()
splash_scale = parse_splash_adaptive_scale() if SPLASH_ADAPTIVE_ENABLED else None
commander_color_count = len(commander_colors) if commander_colors else 0
for raw in pool:
nm = raw.get("name")
if not nm or nm in seen_names:
continue
seen_names.add(nm)
tags = raw.get("tags", [])
role = _classify_role(theme, synergies, tags)
score = _score_card(theme, synergies, role, tags)
reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
if commander_card:
if theme in tags:
score += COMMANDER_THEME_MATCH_BONUS
reasons.append("commander_theme_match")
scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
if scaled:
score += scaled
reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
reasons.append("commander_bias")
rarity = raw.get("rarity") or ""
if rarity:
base_rarity_weight = rarity_weight_cfg.get(rarity, 0.25)
count_so_far = rarity_counts.get(rarity, 0)
increment_weight = base_rarity_weight / (1 + 0.4 * count_so_far)
score += increment_weight
rarity_counts[rarity] = count_so_far + 1
reasons.append(f"rarity_weight_calibrated:{rarity}:{round(increment_weight,2)}")
if rarity_diversity and rarity in rarity_diversity:
lo, hi = rarity_diversity[rarity]
# Only enforce upper bound (overflow penalty)
if rarity_counts[rarity] > hi:
score += RARITY_DIVERSITY_OVER_PENALTY
reasons.append(f"rarity_diversity_overflow:{rarity}:{hi}:{RARITY_DIVERSITY_OVER_PENALTY}")
if raw.get("_splash_off_color"):
penalty = SPLASH_OFF_COLOR_PENALTY
if splash_scale and commander_color_count:
scale = splash_scale.get(commander_color_count, 1.0)
adaptive_penalty = round(penalty * scale, 4)
score += adaptive_penalty
reasons.append(f"splash_off_color_penalty_adaptive:{commander_color_count}:{adaptive_penalty}")
else:
score += penalty # negative value
reasons.append(f"splash_off_color_penalty:{penalty}")
item: SampledCard = {
"name": nm,
"colors": list(raw.get("color_identity", "")),
"roles": [role],
"tags": tags,
"score": score,
"reasons": reasons,
"mana_cost": raw.get("mana_cost"),
"rarity": rarity,
"color_identity_list": raw.get("color_identity_list", []),
"pip_colors": raw.get("pip_colors", []),
}
if role == "payoff":
payoff.append(item)
elif role == "enabler":
enabler.append(item)
elif role == "support":
support.append(item)
else:
wildcard.append(item)
seed = _seed_from(theme, commander)
for bucket in (payoff, enabler, support, wildcard):
_deterministic_shuffle(bucket, seed)
bucket.sort(key=lambda x: (-x["score"], x["name"]))
target_payoff = max(1, int(round(limit * 0.4)))
target_enabler_support = max(1, int(round(limit * 0.4)))
target_wild = max(0, limit - target_payoff - target_enabler_support)
def take(n: int, source: List[SampledCard]):
for i in range(min(n, len(source))):
yield source[i]
chosen: List[SampledCard] = []
chosen.extend(take(target_payoff, payoff))
es_combined = enabler + support
chosen.extend(take(target_enabler_support, es_combined))
chosen.extend(take(target_wild, wildcard))
if len(chosen) < limit:
def fill_from(src: List[SampledCard]):
nonlocal chosen
for it in src:
if len(chosen) >= limit:
break
if it not in chosen:
chosen.append(it)
for bucket in (payoff, enabler, support, wildcard):
fill_from(bucket)
role_soft_caps = {
"payoff": int(round(limit * 0.5)),
"enabler": int(round(limit * 0.35)),
"support": int(round(limit * 0.35)),
"wildcard": int(round(limit * 0.25)),
}
role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
for it in chosen:
r = (it.get("roles") or [None])[0]
if not r or r not in role_soft_caps:
continue
role_seen[r] += 1
if role_seen[r] > max(1, role_soft_caps[r]):
it["score"] = it.get("score", 0) + ROLE_SATURATION_PENALTY # negative value
(it.setdefault("reasons", [])).append(f"role_saturation_penalty:{ROLE_SATURATION_PENALTY}")
if len(chosen) > limit:
chosen = chosen[:limit]
return chosen
# Expose overlap scale for unit tests
commander_overlap_scale = _commander_overlap_scale

View file

@ -0,0 +1,123 @@
"""Scoring & sampling configuration constants (Phase 2 extraction).
Centralizes knobs used by the sampling pipeline so future tuning (or
experimentation via environment variables) can occur without editing the
core algorithm code.
Public constants (import into sampling.py and tests):
COMMANDER_COLOR_FILTER_STRICT
COMMANDER_OVERLAP_BONUS
COMMANDER_THEME_MATCH_BONUS
SPLASH_OFF_COLOR_PENALTY
ROLE_BASE_WEIGHTS
ROLE_SATURATION_PENALTY
Helper functions:
rarity_weight_base() -> dict[str, float]
Returns per-rarity base weights (reads env each call to preserve
existing test expectations that patch env before invoking sampling).
"""
from __future__ import annotations
import os
from typing import Dict, Tuple, Optional
# Commander related bonuses (identical defaults to previous inline values)
COMMANDER_COLOR_FILTER_STRICT = True
COMMANDER_OVERLAP_BONUS = 1.8
COMMANDER_THEME_MATCH_BONUS = 0.9
# Penalties / bonuses
SPLASH_OFF_COLOR_PENALTY = -0.3
# Adaptive splash penalty feature flag & scaling factors.
# When SPLASH_ADAPTIVE=1 the effective penalty becomes:
# base_penalty * splash_adaptive_scale(color_count)
# Where color_count is the number of distinct commander colors (1-5).
# Default scale keeps existing behavior at 1-3 colors, softens at 4, much lighter at 5.
SPLASH_ADAPTIVE_ENABLED = os.getenv("SPLASH_ADAPTIVE", "0") == "1"
_DEFAULT_SPLASH_SCALE = "1:1.0,2:1.0,3:1.0,4:0.6,5:0.35"
def parse_splash_adaptive_scale() -> Dict[int, float]: # dynamic to allow test env changes
spec = os.getenv("SPLASH_ADAPTIVE_SCALE", _DEFAULT_SPLASH_SCALE)
mapping: Dict[int, float] = {}
for part in spec.split(','):
part = part.strip()
if not part or ':' not in part:
continue
k_s, v_s = part.split(':', 1)
try:
k = int(k_s)
v = float(v_s)
if 1 <= k <= 5 and v > 0:
mapping[k] = v
except Exception:
continue
# Ensure all 1-5 present; fallback to 1.0 if unspecified
for i in range(1, 6):
mapping.setdefault(i, 1.0)
return mapping
ROLE_SATURATION_PENALTY = -0.4
# Base role weights applied inside score calculation
ROLE_BASE_WEIGHTS: Dict[str, float] = {
"payoff": 2.5,
"enabler": 2.0,
"support": 1.5,
"wildcard": 0.9,
}
# Rarity base weights (diminishing duplicate influence applied in sampling pipeline)
# Read from env at call time to allow tests to modify.
def rarity_weight_base() -> Dict[str, float]: # dynamic to allow env override per test
return {
"mythic": float(os.getenv("RARITY_W_MYTHIC", "1.2")),
"rare": float(os.getenv("RARITY_W_RARE", "0.9")),
"uncommon": float(os.getenv("RARITY_W_UNCOMMON", "0.65")),
"common": float(os.getenv("RARITY_W_COMMON", "0.4")),
}
__all__ = [
"COMMANDER_COLOR_FILTER_STRICT",
"COMMANDER_OVERLAP_BONUS",
"COMMANDER_THEME_MATCH_BONUS",
"SPLASH_OFF_COLOR_PENALTY",
"SPLASH_ADAPTIVE_ENABLED",
"parse_splash_adaptive_scale",
"ROLE_BASE_WEIGHTS",
"ROLE_SATURATION_PENALTY",
"rarity_weight_base",
"parse_rarity_diversity_targets",
"RARITY_DIVERSITY_OVER_PENALTY",
]
# Extended rarity diversity (optional) ---------------------------------------
# Env var RARITY_DIVERSITY_TARGETS pattern e.g. "mythic:0-1,rare:0-2,uncommon:0-4,common:0-6"
# Parsed into mapping rarity -> (min,max). Only max is enforced currently (penalty applied
# when overflow occurs); min reserved for potential future boosting logic.
RARITY_DIVERSITY_OVER_PENALTY = float(os.getenv("RARITY_DIVERSITY_OVER_PENALTY", "-0.5"))
def parse_rarity_diversity_targets() -> Optional[Dict[str, Tuple[int, int]]]:
spec = os.getenv("RARITY_DIVERSITY_TARGETS")
if not spec:
return None
targets: Dict[str, Tuple[int, int]] = {}
for part in spec.split(','):
part = part.strip()
if not part or ':' not in part:
continue
name, rng = part.split(':', 1)
name = name.strip().lower()
if '-' not in rng:
continue
lo_s, hi_s = rng.split('-', 1)
try:
lo = int(lo_s)
hi = int(hi_s)
if lo < 0 or hi < lo:
continue
targets[name] = (lo, hi)
except Exception:
continue
return targets or None

File diff suppressed because it is too large Load diff