mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat(web): Core Refactor Phase A — extract sampling and cache modules; add adaptive TTL + eviction heuristics, Redis PoC, and metrics wiring. Tests added for TTL, eviction, exports, splash-adaptive, card index, and service worker. Docs+roadmap updated.
This commit is contained in:
parent
c4a7fc48ea
commit
a029d430c5
49 changed files with 3889 additions and 701 deletions
137
code/web/services/card_index.py
Normal file
137
code/web/services/card_index.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Card index construction & lookup (extracted from sampling / theme_preview).
|
||||
|
||||
Phase A refactor: Provides a thin API for building and querying the in-memory
|
||||
card index keyed by tag/theme. Future enhancements may introduce a persistent
|
||||
cache layer or precomputed artifact.
|
||||
|
||||
Public API:
|
||||
maybe_build_index() -> None
|
||||
get_tag_pool(tag: str) -> list[dict]
|
||||
lookup_commander(name: str) -> dict | None
|
||||
|
||||
The index is rebuilt lazily when any of the CSV shard files change mtime.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import csv
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
CARD_FILES_GLOB = [
|
||||
Path("csv_files/blue_cards.csv"),
|
||||
Path("csv_files/white_cards.csv"),
|
||||
Path("csv_files/black_cards.csv"),
|
||||
Path("csv_files/red_cards.csv"),
|
||||
Path("csv_files/green_cards.csv"),
|
||||
Path("csv_files/colorless_cards.csv"),
|
||||
Path("csv_files/cards.csv"), # fallback large file last
|
||||
]
|
||||
|
||||
THEME_TAGS_COL = "themeTags"
|
||||
NAME_COL = "name"
|
||||
COLOR_IDENTITY_COL = "colorIdentity"
|
||||
MANA_COST_COL = "manaCost"
|
||||
RARITY_COL = "rarity"
|
||||
|
||||
_CARD_INDEX: Dict[str, List[Dict[str, Any]]] = {}
|
||||
_CARD_INDEX_MTIME: float | None = None
|
||||
|
||||
_RARITY_NORM = {
|
||||
"mythic rare": "mythic",
|
||||
"mythic": "mythic",
|
||||
"m": "mythic",
|
||||
"rare": "rare",
|
||||
"r": "rare",
|
||||
"uncommon": "uncommon",
|
||||
"u": "uncommon",
|
||||
"common": "common",
|
||||
"c": "common",
|
||||
}
|
||||
|
||||
def _normalize_rarity(raw: str) -> str:
|
||||
r = (raw or "").strip().lower()
|
||||
return _RARITY_NORM.get(r, r)
|
||||
|
||||
def _resolve_card_files() -> List[Path]:
|
||||
"""Return base card file list + any extra test files supplied via env.
|
||||
|
||||
Environment variable: CARD_INDEX_EXTRA_CSV can contain a comma or semicolon
|
||||
separated list of additional CSV paths (used by tests to inject synthetic
|
||||
edge cases without polluting production shards).
|
||||
"""
|
||||
files: List[Path] = list(CARD_FILES_GLOB)
|
||||
extra = os.getenv("CARD_INDEX_EXTRA_CSV")
|
||||
if extra:
|
||||
for part in extra.replace(";", ",").split(","):
|
||||
p = part.strip()
|
||||
if not p:
|
||||
continue
|
||||
path_obj = Path(p)
|
||||
# Include even if missing; maybe created later in test before build
|
||||
files.append(path_obj)
|
||||
return files
|
||||
|
||||
|
||||
def maybe_build_index() -> None:
|
||||
"""Rebuild the index if any card CSV mtime changed.
|
||||
|
||||
Incorporates any extra CSVs specified via CARD_INDEX_EXTRA_CSV.
|
||||
"""
|
||||
global _CARD_INDEX, _CARD_INDEX_MTIME
|
||||
latest = 0.0
|
||||
card_files = _resolve_card_files()
|
||||
for p in card_files:
|
||||
if p.exists():
|
||||
mt = p.stat().st_mtime
|
||||
if mt > latest:
|
||||
latest = mt
|
||||
if _CARD_INDEX and _CARD_INDEX_MTIME and latest <= _CARD_INDEX_MTIME:
|
||||
return
|
||||
new_index: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for p in card_files:
|
||||
if not p.exists():
|
||||
continue
|
||||
try:
|
||||
with p.open("r", encoding="utf-8", newline="") as fh:
|
||||
reader = csv.DictReader(fh)
|
||||
if not reader.fieldnames or THEME_TAGS_COL not in reader.fieldnames:
|
||||
continue
|
||||
for row in reader:
|
||||
name = row.get(NAME_COL) or row.get("faceName") or ""
|
||||
tags_raw = row.get(THEME_TAGS_COL) or ""
|
||||
tags = [t.strip(" '[]") for t in tags_raw.split(',') if t.strip()] if tags_raw else []
|
||||
if not tags:
|
||||
continue
|
||||
color_id = (row.get(COLOR_IDENTITY_COL) or "").strip()
|
||||
mana_cost = (row.get(MANA_COST_COL) or "").strip()
|
||||
rarity = _normalize_rarity(row.get(RARITY_COL) or "")
|
||||
for tg in tags:
|
||||
if not tg:
|
||||
continue
|
||||
new_index.setdefault(tg, []).append({
|
||||
"name": name,
|
||||
"color_identity": color_id,
|
||||
"tags": tags,
|
||||
"mana_cost": mana_cost,
|
||||
"rarity": rarity,
|
||||
"color_identity_list": list(color_id) if color_id else [],
|
||||
"pip_colors": [c for c in mana_cost if c in {"W","U","B","R","G"}],
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
_CARD_INDEX = new_index
|
||||
_CARD_INDEX_MTIME = latest
|
||||
|
||||
def get_tag_pool(tag: str) -> List[Dict[str, Any]]:
|
||||
return _CARD_INDEX.get(tag, [])
|
||||
|
||||
def lookup_commander(name: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
if not name:
|
||||
return None
|
||||
needle = name.lower().strip()
|
||||
for tag_cards in _CARD_INDEX.values():
|
||||
for c in tag_cards:
|
||||
if c.get("name", "").lower() == needle:
|
||||
return c
|
||||
return None
|
||||
323
code/web/services/preview_cache.py
Normal file
323
code/web/services/preview_cache.py
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
"""Preview cache utilities & adaptive policy (Core Refactor Phase A continued).
|
||||
|
||||
This module now owns:
|
||||
- In-memory preview cache (OrderedDict)
|
||||
- Cache bust helper
|
||||
- Adaptive TTL policy & recent hit tracking
|
||||
- Background refresh thread orchestration (warming top-K hot themes)
|
||||
|
||||
`theme_preview` orchestrator invokes `record_request_hit()` and
|
||||
`maybe_adapt_ttl()` after each build/cache check, and calls `ensure_bg_thread()`
|
||||
post-build. Metrics still aggregated in `theme_preview` but TTL state lives
|
||||
here to prepare for future backend abstraction.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict, deque
|
||||
from typing import Any, Dict, Tuple, Callable
|
||||
import time as _t
|
||||
import os
|
||||
import json
|
||||
import threading
|
||||
import math
|
||||
|
||||
from .preview_metrics import record_eviction # type: ignore
|
||||
|
||||
# Phase 2 extraction: adaptive TTL band policy moved into preview_policy
|
||||
from .preview_policy import (
|
||||
compute_ttl_adjustment,
|
||||
DEFAULT_TTL_BASE as _POLICY_TTL_BASE,
|
||||
DEFAULT_TTL_MIN as _POLICY_TTL_MIN,
|
||||
DEFAULT_TTL_MAX as _POLICY_TTL_MAX,
|
||||
)
|
||||
from .preview_cache_backend import redis_store # type: ignore
|
||||
|
||||
TTL_SECONDS = 600
|
||||
# Backward-compat variable names retained (tests may reference) mapping to policy constants
|
||||
_TTL_BASE = _POLICY_TTL_BASE
|
||||
_TTL_MIN = _POLICY_TTL_MIN
|
||||
_TTL_MAX = _POLICY_TTL_MAX
|
||||
_ADAPT_SAMPLE_WINDOW = 120
|
||||
_ADAPT_INTERVAL_S = 30
|
||||
_ADAPTATION_ENABLED = (os.getenv("THEME_PREVIEW_ADAPTIVE") or "").lower() in {"1","true","yes","on"}
|
||||
_RECENT_HITS: "deque[bool]" = deque(maxlen=_ADAPT_SAMPLE_WINDOW)
|
||||
_LAST_ADAPT_AT: float | None = None
|
||||
|
||||
_BG_REFRESH_THREAD_STARTED = False
|
||||
_BG_REFRESH_INTERVAL_S = int(os.getenv("THEME_PREVIEW_BG_REFRESH_INTERVAL") or 120)
|
||||
_BG_REFRESH_ENABLED = (os.getenv("THEME_PREVIEW_BG_REFRESH") or "").lower() in {"1","true","yes","on"}
|
||||
_BG_REFRESH_MIN = 30
|
||||
_BG_REFRESH_MAX = max(300, _BG_REFRESH_INTERVAL_S * 5)
|
||||
|
||||
def record_request_hit(hit: bool) -> None:
|
||||
_RECENT_HITS.append(hit)
|
||||
|
||||
def recent_hit_window() -> int:
|
||||
return len(_RECENT_HITS)
|
||||
|
||||
def ttl_seconds() -> int:
|
||||
return TTL_SECONDS
|
||||
|
||||
def _maybe_adapt_ttl(now: float) -> None:
|
||||
"""Apply adaptive TTL adjustment using extracted policy.
|
||||
|
||||
Keeps prior guards (sample window, interval) for stability; only the
|
||||
banded adjustment math has moved to preview_policy.
|
||||
"""
|
||||
global TTL_SECONDS, _LAST_ADAPT_AT
|
||||
if not _ADAPTATION_ENABLED:
|
||||
return
|
||||
if len(_RECENT_HITS) < max(30, int(_ADAPT_SAMPLE_WINDOW * 0.5)):
|
||||
return
|
||||
if _LAST_ADAPT_AT and (now - _LAST_ADAPT_AT) < _ADAPT_INTERVAL_S:
|
||||
return
|
||||
hit_ratio = sum(1 for h in _RECENT_HITS if h) / len(_RECENT_HITS)
|
||||
new_ttl = compute_ttl_adjustment(hit_ratio, TTL_SECONDS, _TTL_BASE, _TTL_MIN, _TTL_MAX)
|
||||
if new_ttl != TTL_SECONDS:
|
||||
TTL_SECONDS = new_ttl
|
||||
try: # pragma: no cover - defensive logging
|
||||
print(json.dumps({
|
||||
"event": "theme_preview_ttl_adapt",
|
||||
"hit_ratio": round(hit_ratio, 3),
|
||||
"ttl": TTL_SECONDS,
|
||||
})) # noqa: T201
|
||||
except Exception:
|
||||
pass
|
||||
_LAST_ADAPT_AT = now
|
||||
|
||||
def maybe_adapt_ttl() -> None:
|
||||
_maybe_adapt_ttl(_t.time())
|
||||
|
||||
def _bg_refresh_loop(build_top_slug: Callable[[str], None], get_hot_slugs: Callable[[], list[str]]): # pragma: no cover
|
||||
while True:
|
||||
if not _BG_REFRESH_ENABLED:
|
||||
return
|
||||
try:
|
||||
for slug in get_hot_slugs():
|
||||
try:
|
||||
build_top_slug(slug)
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
_t.sleep(_BG_REFRESH_INTERVAL_S)
|
||||
|
||||
def ensure_bg_thread(build_top_slug: Callable[[str], None], get_hot_slugs: Callable[[], list[str]]): # pragma: no cover
|
||||
global _BG_REFRESH_THREAD_STARTED
|
||||
if _BG_REFRESH_THREAD_STARTED or not _BG_REFRESH_ENABLED:
|
||||
return
|
||||
try:
|
||||
th = threading.Thread(target=_bg_refresh_loop, args=(build_top_slug, get_hot_slugs), name="theme_preview_bg_refresh", daemon=True)
|
||||
th.start()
|
||||
_BG_REFRESH_THREAD_STARTED = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
PREVIEW_CACHE: "OrderedDict[Tuple[str, int, str | None, str | None, str], Dict[str, Any]]" = OrderedDict()
|
||||
# Cache entry shape (dict) — groundwork for adaptive eviction (Phase 2)
|
||||
# Keys:
|
||||
# payload: preview payload dict
|
||||
# _cached_at / cached_at: epoch seconds when stored (TTL reference; _cached_at kept for backward compat)
|
||||
# inserted_at: epoch seconds first insertion
|
||||
# last_access: epoch seconds of last successful cache hit
|
||||
# hit_count: int number of cache hits (excludes initial store)
|
||||
# build_cost_ms: float build duration captured at store time (used for cost-based protection)
|
||||
|
||||
def register_cache_hit(key: Tuple[str, int, str | None, str | None, str]) -> None:
|
||||
entry = PREVIEW_CACHE.get(key)
|
||||
if not entry:
|
||||
return
|
||||
now = _t.time()
|
||||
# Initialize metadata if legacy entry present
|
||||
if "inserted_at" not in entry:
|
||||
entry["inserted_at"] = entry.get("_cached_at", now)
|
||||
entry["last_access"] = now
|
||||
entry["hit_count"] = int(entry.get("hit_count", 0)) + 1
|
||||
|
||||
def store_cache_entry(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], build_cost_ms: float) -> None:
|
||||
now = _t.time()
|
||||
PREVIEW_CACHE[key] = {
|
||||
"payload": payload,
|
||||
"_cached_at": now, # legacy field name
|
||||
"cached_at": now,
|
||||
"inserted_at": now,
|
||||
"last_access": now,
|
||||
"hit_count": 0,
|
||||
"build_cost_ms": float(build_cost_ms),
|
||||
}
|
||||
PREVIEW_CACHE.move_to_end(key)
|
||||
# Optional Redis write-through (best-effort)
|
||||
try:
|
||||
if os.getenv("THEME_PREVIEW_REDIS_URL") and not os.getenv("THEME_PREVIEW_REDIS_DISABLE"):
|
||||
redis_store(key, payload, int(TTL_SECONDS), build_cost_ms)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- Adaptive Eviction Weight & Threshold Resolution (Phase 2 Step 4) --- #
|
||||
_EVICT_WEIGHTS_CACHE: Dict[str, float] | None = None
|
||||
_EVICT_THRESH_CACHE: Tuple[float, float, float] | None = None
|
||||
|
||||
def _resolve_eviction_weights() -> Dict[str, float]:
|
||||
global _EVICT_WEIGHTS_CACHE
|
||||
if _EVICT_WEIGHTS_CACHE is not None:
|
||||
return _EVICT_WEIGHTS_CACHE
|
||||
def _f(env_key: str, default: float) -> float:
|
||||
raw = os.getenv(env_key)
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return float(raw)
|
||||
except Exception:
|
||||
return default
|
||||
_EVICT_WEIGHTS_CACHE = {
|
||||
"W_HITS": _f("THEME_PREVIEW_EVICT_W_HITS", 3.0),
|
||||
"W_RECENCY": _f("THEME_PREVIEW_EVICT_W_RECENCY", 2.0),
|
||||
"W_COST": _f("THEME_PREVIEW_EVICT_W_COST", 1.0),
|
||||
"W_AGE": _f("THEME_PREVIEW_EVICT_W_AGE", 1.5),
|
||||
}
|
||||
return _EVICT_WEIGHTS_CACHE
|
||||
|
||||
def _resolve_cost_thresholds() -> Tuple[float, float, float]:
|
||||
global _EVICT_THRESH_CACHE
|
||||
if _EVICT_THRESH_CACHE is not None:
|
||||
return _EVICT_THRESH_CACHE
|
||||
raw = os.getenv("THEME_PREVIEW_EVICT_COST_THRESHOLDS", "5,15,40")
|
||||
parts = [p.strip() for p in raw.split(',') if p.strip()]
|
||||
nums: list[float] = []
|
||||
for p in parts:
|
||||
try:
|
||||
nums.append(float(p))
|
||||
except Exception:
|
||||
pass
|
||||
while len(nums) < 3:
|
||||
# pad with defaults if insufficient
|
||||
defaults = [5.0, 15.0, 40.0]
|
||||
nums.append(defaults[len(nums)])
|
||||
nums = sorted(nums[:3])
|
||||
_EVICT_THRESH_CACHE = (nums[0], nums[1], nums[2])
|
||||
return _EVICT_THRESH_CACHE
|
||||
|
||||
def _cost_bucket(build_cost_ms: float) -> int:
|
||||
t1, t2, t3 = _resolve_cost_thresholds()
|
||||
if build_cost_ms < t1:
|
||||
return 0
|
||||
if build_cost_ms < t2:
|
||||
return 1
|
||||
if build_cost_ms < t3:
|
||||
return 2
|
||||
return 3
|
||||
|
||||
def compute_protection_score(entry: Dict[str, Any], now: float | None = None) -> float:
|
||||
"""Compute protection score (higher = more protected from eviction).
|
||||
|
||||
Score components:
|
||||
- hit_count (log scaled) weighted by W_HITS
|
||||
- recency (inverse minutes since last access) weighted by W_RECENCY
|
||||
- build cost bucket weighted by W_COST
|
||||
- age penalty (minutes since insert) weighted by W_AGE (subtracted)
|
||||
"""
|
||||
if now is None:
|
||||
now = _t.time()
|
||||
weights = _resolve_eviction_weights()
|
||||
inserted = float(entry.get("inserted_at", now))
|
||||
last_access = float(entry.get("last_access", inserted))
|
||||
hits = int(entry.get("hit_count", 0))
|
||||
build_cost_ms = float(entry.get("build_cost_ms", 0.0))
|
||||
minutes_since_last = max(0.0, (now - last_access) / 60.0)
|
||||
minutes_since_insert = max(0.0, (now - inserted) / 60.0)
|
||||
recency_score = 1.0 / (1.0 + minutes_since_last)
|
||||
age_score = minutes_since_insert
|
||||
cost_b = _cost_bucket(build_cost_ms)
|
||||
score = (
|
||||
weights["W_HITS"] * math.log(1 + hits)
|
||||
+ weights["W_RECENCY"] * recency_score
|
||||
+ weights["W_COST"] * cost_b
|
||||
- weights["W_AGE"] * age_score
|
||||
)
|
||||
return float(score)
|
||||
|
||||
# --- Eviction Logic (Phase 2 Step 6) --- #
|
||||
def _cache_max() -> int:
|
||||
try:
|
||||
raw = os.getenv("THEME_PREVIEW_CACHE_MAX") or "400"
|
||||
v = int(raw)
|
||||
if v <= 0:
|
||||
raise ValueError
|
||||
return v
|
||||
except Exception:
|
||||
return 400
|
||||
|
||||
def evict_if_needed() -> None:
|
||||
"""Adaptive eviction replacing FIFO.
|
||||
|
||||
Strategy:
|
||||
- If size <= limit: no-op
|
||||
- If size > 2*limit: emergency overflow path (age-based removal until within limit)
|
||||
- Else: remove lowest protection score entry (single) if over limit
|
||||
"""
|
||||
try:
|
||||
# Removed previous hard floor (50) to allow test scenarios with small limits.
|
||||
# Operational deployments can still set higher env value. Tests rely on low limits
|
||||
# (e.g., 5) to exercise eviction deterministically.
|
||||
limit = _cache_max()
|
||||
size = len(PREVIEW_CACHE)
|
||||
if size <= limit:
|
||||
return
|
||||
now = _t.time()
|
||||
# Emergency overflow path
|
||||
if size > 2 * limit:
|
||||
while len(PREVIEW_CACHE) > limit:
|
||||
# Oldest by inserted_at/_cached_at
|
||||
oldest_key = min(
|
||||
PREVIEW_CACHE.items(),
|
||||
key=lambda kv: kv[1].get("inserted_at", kv[1].get("_cached_at", 0.0)),
|
||||
)[0]
|
||||
entry = PREVIEW_CACHE.pop(oldest_key)
|
||||
meta = {
|
||||
"hit_count": int(entry.get("hit_count", 0)),
|
||||
"age_ms": int((now - entry.get("inserted_at", now)) * 1000),
|
||||
"build_cost_ms": float(entry.get("build_cost_ms", 0.0)),
|
||||
"protection_score": compute_protection_score(entry, now),
|
||||
"reason": "emergency_overflow",
|
||||
"cache_limit": limit,
|
||||
"size_before": size,
|
||||
"size_after": len(PREVIEW_CACHE),
|
||||
}
|
||||
record_eviction(meta)
|
||||
return
|
||||
# Standard single-entry score-based eviction
|
||||
lowest_key = None
|
||||
lowest_score = None
|
||||
for key, entry in PREVIEW_CACHE.items():
|
||||
score = compute_protection_score(entry, now)
|
||||
if lowest_score is None or score < lowest_score:
|
||||
lowest_key = key
|
||||
lowest_score = score
|
||||
if lowest_key is not None:
|
||||
entry = PREVIEW_CACHE.pop(lowest_key)
|
||||
meta = {
|
||||
"hit_count": int(entry.get("hit_count", 0)),
|
||||
"age_ms": int((now - entry.get("inserted_at", now)) * 1000),
|
||||
"build_cost_ms": float(entry.get("build_cost_ms", 0.0)),
|
||||
"protection_score": float(lowest_score if lowest_score is not None else 0.0),
|
||||
"reason": "low_score",
|
||||
"cache_limit": limit,
|
||||
"size_before": size,
|
||||
"size_after": len(PREVIEW_CACHE),
|
||||
}
|
||||
record_eviction(meta)
|
||||
except Exception:
|
||||
# Fail quiet; eviction is best-effort
|
||||
pass
|
||||
_PREVIEW_LAST_BUST_AT: float | None = None
|
||||
|
||||
def bust_preview_cache(reason: str | None = None) -> None: # pragma: no cover (trivial)
|
||||
global PREVIEW_CACHE, _PREVIEW_LAST_BUST_AT
|
||||
try:
|
||||
PREVIEW_CACHE.clear()
|
||||
_PREVIEW_LAST_BUST_AT = _t.time()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def preview_cache_last_bust_at() -> float | None:
|
||||
return _PREVIEW_LAST_BUST_AT
|
||||
113
code/web/services/preview_cache_backend.py
Normal file
113
code/web/services/preview_cache_backend.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""Cache backend abstraction (Phase 2 extension) with Redis PoC.
|
||||
|
||||
The in-memory cache remains authoritative for adaptive eviction heuristics.
|
||||
This backend layer provides optional read-through / write-through to Redis
|
||||
for latency & CPU comparison. It is intentionally minimal:
|
||||
|
||||
Environment:
|
||||
THEME_PREVIEW_REDIS_URL=redis://host:port/db -> enable PoC if redis-py importable
|
||||
THEME_PREVIEW_REDIS_DISABLE=1 -> hard disable even if URL present
|
||||
|
||||
Behavior:
|
||||
- On store: serialize payload + metadata into JSON and SETEX with TTL.
|
||||
- On get (memory miss only): attempt Redis GET and rehydrate (respect TTL).
|
||||
- Failures are swallowed; metrics track attempts/hits/errors.
|
||||
|
||||
No eviction coordination is attempted; Redis TTL handles expiry. The goal is
|
||||
purely observational at this stage.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Dict, Any, Tuple
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
try: # lazy optional dependency
|
||||
import redis # type: ignore
|
||||
except Exception: # pragma: no cover - absence path
|
||||
redis = None # type: ignore
|
||||
|
||||
_URL = os.getenv("THEME_PREVIEW_REDIS_URL")
|
||||
_DISABLED = (os.getenv("THEME_PREVIEW_REDIS_DISABLE") or "").lower() in {"1","true","yes","on"}
|
||||
|
||||
_CLIENT = None
|
||||
_INIT_ERR: str | None = None
|
||||
|
||||
def _init() -> None:
|
||||
global _CLIENT, _INIT_ERR
|
||||
if _CLIENT is not None or _INIT_ERR is not None:
|
||||
return
|
||||
if _DISABLED or not _URL or not redis:
|
||||
_INIT_ERR = "disabled_or_missing"
|
||||
return
|
||||
try:
|
||||
_CLIENT = redis.Redis.from_url(_URL, socket_timeout=0.25) # type: ignore
|
||||
# lightweight ping (non-fatal)
|
||||
try:
|
||||
_CLIENT.ping()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e: # pragma: no cover - network/dep issues
|
||||
_INIT_ERR = f"init_error:{e}"[:120]
|
||||
|
||||
|
||||
def backend_info() -> Dict[str, Any]:
|
||||
return {
|
||||
"enabled": bool(_CLIENT),
|
||||
"init_error": _INIT_ERR,
|
||||
"url_present": bool(_URL),
|
||||
}
|
||||
|
||||
def _serialize(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], build_cost_ms: float) -> str:
|
||||
return json.dumps({
|
||||
"k": list(key),
|
||||
"p": payload,
|
||||
"bc": build_cost_ms,
|
||||
"ts": time.time(),
|
||||
}, separators=(",", ":"))
|
||||
|
||||
def redis_store(key: Tuple[str, int, str | None, str | None, str], payload: Dict[str, Any], ttl_seconds: int, build_cost_ms: float) -> bool:
|
||||
_init()
|
||||
if not _CLIENT:
|
||||
return False
|
||||
try:
|
||||
data = _serialize(key, payload, build_cost_ms)
|
||||
# Compose a simple namespaced key; join tuple parts with '|'
|
||||
skey = "tpv:" + "|".join([str(part) for part in key])
|
||||
_CLIENT.setex(skey, ttl_seconds, data)
|
||||
return True
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
def redis_get(key: Tuple[str, int, str | None, str | None, str]) -> Optional[Dict[str, Any]]:
|
||||
_init()
|
||||
if not _CLIENT:
|
||||
return None
|
||||
try:
|
||||
skey = "tpv:" + "|".join([str(part) for part in key])
|
||||
raw: bytes | None = _CLIENT.get(skey) # type: ignore
|
||||
if not raw:
|
||||
return None
|
||||
obj = json.loads(raw.decode("utf-8"))
|
||||
# Expect shape from _serialize
|
||||
payload = obj.get("p")
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
return {
|
||||
"payload": payload,
|
||||
"_cached_at": float(obj.get("ts") or 0),
|
||||
"cached_at": float(obj.get("ts") or 0),
|
||||
"inserted_at": float(obj.get("ts") or 0),
|
||||
"last_access": float(obj.get("ts") or 0),
|
||||
"hit_count": 0,
|
||||
"build_cost_ms": float(obj.get("bc") or 0.0),
|
||||
}
|
||||
except Exception: # pragma: no cover
|
||||
return None
|
||||
|
||||
__all__ = [
|
||||
"backend_info",
|
||||
"redis_store",
|
||||
"redis_get",
|
||||
]
|
||||
285
code/web/services/preview_metrics.py
Normal file
285
code/web/services/preview_metrics.py
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
"""Metrics aggregation for theme preview service.
|
||||
|
||||
Extracted from `theme_preview.py` (Phase 2 refactor) to isolate
|
||||
metrics/state reporting from orchestration & caching logic. This allows
|
||||
future experimentation with alternative cache backends / eviction without
|
||||
coupling metrics concerns.
|
||||
|
||||
Public API:
|
||||
record_build_duration(ms: float)
|
||||
record_role_counts(role_counts: dict[str,int])
|
||||
record_curated_sampled(curated: int, sampled: int)
|
||||
record_per_theme(slug: str, build_ms: float, curated: int, sampled: int)
|
||||
record_request(hit: bool, error: bool = False, client_error: bool = False)
|
||||
record_per_theme_error(slug: str)
|
||||
preview_metrics() -> dict
|
||||
|
||||
The consuming orchestrator remains responsible for calling these hooks.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import os
|
||||
|
||||
# Global counters (mirrors previous names for backward compatibility where tests may introspect)
|
||||
_PREVIEW_BUILD_MS_TOTAL = 0.0
|
||||
_PREVIEW_BUILD_COUNT = 0
|
||||
_BUILD_DURATIONS: List[float] = []
|
||||
_ROLE_GLOBAL_COUNTS: dict[str, int] = {}
|
||||
_CURATED_GLOBAL = 0
|
||||
_SAMPLED_GLOBAL = 0
|
||||
_PREVIEW_PER_THEME: dict[str, Dict[str, Any]] = {}
|
||||
_PREVIEW_PER_THEME_REQUESTS: dict[str, int] = {}
|
||||
_PREVIEW_PER_THEME_ERRORS: dict[str, int] = {}
|
||||
_PREVIEW_REQUESTS = 0
|
||||
_PREVIEW_CACHE_HITS = 0
|
||||
_PREVIEW_ERROR_COUNT = 0
|
||||
_PREVIEW_REQUEST_ERROR_COUNT = 0
|
||||
_EVICTION_TOTAL = 0
|
||||
_EVICTION_BY_REASON: dict[str, int] = {}
|
||||
_EVICTION_LAST: dict[str, Any] | None = None
|
||||
_SPLASH_OFF_COLOR_TOTAL = 0
|
||||
_SPLASH_PREVIEWS_WITH_PENALTY = 0
|
||||
_SPLASH_PENALTY_CARD_EVENTS = 0
|
||||
_REDIS_GET_ATTEMPTS = 0
|
||||
_REDIS_GET_HITS = 0
|
||||
_REDIS_GET_ERRORS = 0
|
||||
_REDIS_STORE_ATTEMPTS = 0
|
||||
_REDIS_STORE_ERRORS = 0
|
||||
|
||||
def record_redis_get(hit: bool, error: bool = False):
|
||||
global _REDIS_GET_ATTEMPTS, _REDIS_GET_HITS, _REDIS_GET_ERRORS
|
||||
_REDIS_GET_ATTEMPTS += 1
|
||||
if hit:
|
||||
_REDIS_GET_HITS += 1
|
||||
if error:
|
||||
_REDIS_GET_ERRORS += 1
|
||||
|
||||
def record_redis_store(error: bool = False):
|
||||
global _REDIS_STORE_ATTEMPTS, _REDIS_STORE_ERRORS
|
||||
_REDIS_STORE_ATTEMPTS += 1
|
||||
if error:
|
||||
_REDIS_STORE_ERRORS += 1
|
||||
|
||||
# External state accessors (injected via set functions) to avoid import cycle
|
||||
_ttl_seconds_fn = None
|
||||
_recent_hit_window_fn = None
|
||||
_cache_len_fn = None
|
||||
_last_bust_at_fn = None
|
||||
_curated_synergy_loaded_fn = None
|
||||
_curated_synergy_size_fn = None
|
||||
|
||||
def configure_external_access(
|
||||
ttl_seconds_fn,
|
||||
recent_hit_window_fn,
|
||||
cache_len_fn,
|
||||
last_bust_at_fn,
|
||||
curated_synergy_loaded_fn,
|
||||
curated_synergy_size_fn,
|
||||
):
|
||||
global _ttl_seconds_fn, _recent_hit_window_fn, _cache_len_fn, _last_bust_at_fn, _curated_synergy_loaded_fn, _curated_synergy_size_fn
|
||||
_ttl_seconds_fn = ttl_seconds_fn
|
||||
_recent_hit_window_fn = recent_hit_window_fn
|
||||
_cache_len_fn = cache_len_fn
|
||||
_last_bust_at_fn = last_bust_at_fn
|
||||
_curated_synergy_loaded_fn = curated_synergy_loaded_fn
|
||||
_curated_synergy_size_fn = curated_synergy_size_fn
|
||||
|
||||
def record_build_duration(ms: float) -> None:
|
||||
global _PREVIEW_BUILD_MS_TOTAL, _PREVIEW_BUILD_COUNT
|
||||
_PREVIEW_BUILD_MS_TOTAL += ms
|
||||
_PREVIEW_BUILD_COUNT += 1
|
||||
_BUILD_DURATIONS.append(ms)
|
||||
|
||||
def record_role_counts(role_counts: Dict[str, int]) -> None:
|
||||
for r, c in role_counts.items():
|
||||
_ROLE_GLOBAL_COUNTS[r] = _ROLE_GLOBAL_COUNTS.get(r, 0) + c
|
||||
|
||||
def record_curated_sampled(curated: int, sampled: int) -> None:
|
||||
global _CURATED_GLOBAL, _SAMPLED_GLOBAL
|
||||
_CURATED_GLOBAL += curated
|
||||
_SAMPLED_GLOBAL += sampled
|
||||
|
||||
def record_per_theme(slug: str, build_ms: float, curated: int, sampled: int) -> None:
|
||||
data = _PREVIEW_PER_THEME.setdefault(slug, {"total_ms": 0.0, "builds": 0, "durations": [], "curated": 0, "sampled": 0})
|
||||
data["total_ms"] += build_ms
|
||||
data["builds"] += 1
|
||||
durs = data["durations"]
|
||||
durs.append(build_ms)
|
||||
if len(durs) > 100:
|
||||
del durs[0: len(durs) - 100]
|
||||
data["curated"] += curated
|
||||
data["sampled"] += sampled
|
||||
|
||||
def record_request(hit: bool, error: bool = False, client_error: bool = False) -> None:
|
||||
global _PREVIEW_REQUESTS, _PREVIEW_CACHE_HITS, _PREVIEW_ERROR_COUNT, _PREVIEW_REQUEST_ERROR_COUNT
|
||||
_PREVIEW_REQUESTS += 1
|
||||
if hit:
|
||||
_PREVIEW_CACHE_HITS += 1
|
||||
if error:
|
||||
_PREVIEW_ERROR_COUNT += 1
|
||||
if client_error:
|
||||
_PREVIEW_REQUEST_ERROR_COUNT += 1
|
||||
|
||||
def record_per_theme_error(slug: str) -> None:
|
||||
_PREVIEW_PER_THEME_ERRORS[slug] = _PREVIEW_PER_THEME_ERRORS.get(slug, 0) + 1
|
||||
|
||||
def _percentile(sorted_vals: List[float], pct: float) -> float:
|
||||
if not sorted_vals:
|
||||
return 0.0
|
||||
k = (len(sorted_vals) - 1) * pct
|
||||
f = int(k)
|
||||
c = min(f + 1, len(sorted_vals) - 1)
|
||||
if f == c:
|
||||
return sorted_vals[f]
|
||||
d0 = sorted_vals[f] * (c - k)
|
||||
d1 = sorted_vals[c] * (k - f)
|
||||
return d0 + d1
|
||||
|
||||
def preview_metrics() -> Dict[str, Any]:
|
||||
ttl_seconds = _ttl_seconds_fn() if _ttl_seconds_fn else 0
|
||||
recent_window = _recent_hit_window_fn() if _recent_hit_window_fn else 0
|
||||
cache_len = _cache_len_fn() if _cache_len_fn else 0
|
||||
last_bust = _last_bust_at_fn() if _last_bust_at_fn else None
|
||||
avg_ms = (_PREVIEW_BUILD_MS_TOTAL / _PREVIEW_BUILD_COUNT) if _PREVIEW_BUILD_COUNT else 0.0
|
||||
durations_list = sorted(list(_BUILD_DURATIONS))
|
||||
p95 = _percentile(durations_list, 0.95)
|
||||
# Role distribution aggregate
|
||||
total_roles = sum(_ROLE_GLOBAL_COUNTS.values()) or 1
|
||||
target = {"payoff": 0.4, "enabler+support": 0.4, "wildcard": 0.2}
|
||||
actual_enabler_support = (_ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0)) / total_roles
|
||||
role_distribution = {
|
||||
"payoff": {
|
||||
"count": _ROLE_GLOBAL_COUNTS.get("payoff", 0),
|
||||
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("payoff", 0) / total_roles) * 100, 2),
|
||||
"target_pct": target["payoff"] * 100,
|
||||
},
|
||||
"enabler_support": {
|
||||
"count": _ROLE_GLOBAL_COUNTS.get("enabler", 0) + _ROLE_GLOBAL_COUNTS.get("support", 0),
|
||||
"actual_pct": round(actual_enabler_support * 100, 2),
|
||||
"target_pct": target["enabler+support"] * 100,
|
||||
},
|
||||
"wildcard": {
|
||||
"count": _ROLE_GLOBAL_COUNTS.get("wildcard", 0),
|
||||
"actual_pct": round((_ROLE_GLOBAL_COUNTS.get("wildcard", 0) / total_roles) * 100, 2),
|
||||
"target_pct": target["wildcard"] * 100,
|
||||
},
|
||||
}
|
||||
editorial_coverage_pct = round((_CURATED_GLOBAL / max(1, (_CURATED_GLOBAL + _SAMPLED_GLOBAL))) * 100, 2)
|
||||
per_theme_stats: Dict[str, Any] = {}
|
||||
for slug, data in list(_PREVIEW_PER_THEME.items())[:50]:
|
||||
durs = list(data.get("durations", []))
|
||||
sd = sorted(durs)
|
||||
p50 = _percentile(sd, 0.50)
|
||||
p95_local = _percentile(sd, 0.95)
|
||||
per_theme_stats[slug] = {
|
||||
"avg_ms": round(data["total_ms"] / max(1, data["builds"]), 2),
|
||||
"p50_ms": round(p50, 2),
|
||||
"p95_ms": round(p95_local, 2),
|
||||
"builds": data["builds"],
|
||||
"avg_curated_pct": round((data["curated"] / max(1, (data["curated"] + data["sampled"])) ) * 100, 2),
|
||||
"requests": _PREVIEW_PER_THEME_REQUESTS.get(slug, 0),
|
||||
"curated_total": data.get("curated", 0),
|
||||
"sampled_total": data.get("sampled", 0),
|
||||
}
|
||||
error_rate = 0.0
|
||||
total_req = _PREVIEW_REQUESTS or 0
|
||||
if total_req:
|
||||
error_rate = round((_PREVIEW_ERROR_COUNT / total_req) * 100, 2)
|
||||
try:
|
||||
enforce_threshold = float(os.getenv("EXAMPLE_ENFORCE_THRESHOLD", "90"))
|
||||
except Exception: # pragma: no cover
|
||||
enforce_threshold = 90.0
|
||||
example_enforcement_active = editorial_coverage_pct >= enforce_threshold
|
||||
curated_synergy_loaded = _curated_synergy_loaded_fn() if _curated_synergy_loaded_fn else False
|
||||
curated_synergy_size = _curated_synergy_size_fn() if _curated_synergy_size_fn else 0
|
||||
return {
|
||||
"preview_requests": _PREVIEW_REQUESTS,
|
||||
"preview_cache_hits": _PREVIEW_CACHE_HITS,
|
||||
"preview_cache_entries": cache_len,
|
||||
"preview_cache_evictions": _EVICTION_TOTAL,
|
||||
"preview_cache_evictions_by_reason": dict(_EVICTION_BY_REASON),
|
||||
"preview_cache_eviction_last": _EVICTION_LAST,
|
||||
"preview_avg_build_ms": round(avg_ms, 2),
|
||||
"preview_p95_build_ms": round(p95, 2),
|
||||
"preview_error_rate_pct": error_rate,
|
||||
"preview_client_fetch_errors": _PREVIEW_REQUEST_ERROR_COUNT,
|
||||
"preview_ttl_seconds": ttl_seconds,
|
||||
"preview_ttl_adaptive": True,
|
||||
"preview_ttl_window": recent_window,
|
||||
"preview_last_bust_at": last_bust,
|
||||
"role_distribution": role_distribution,
|
||||
"editorial_curated_vs_sampled_pct": editorial_coverage_pct,
|
||||
"example_enforcement_active": example_enforcement_active,
|
||||
"example_enforce_threshold_pct": enforce_threshold,
|
||||
"editorial_curated_total": _CURATED_GLOBAL,
|
||||
"editorial_sampled_total": _SAMPLED_GLOBAL,
|
||||
"per_theme": per_theme_stats,
|
||||
"per_theme_errors": dict(list(_PREVIEW_PER_THEME_ERRORS.items())[:50]),
|
||||
"curated_synergy_matrix_loaded": curated_synergy_loaded,
|
||||
"curated_synergy_matrix_size": curated_synergy_size,
|
||||
"splash_off_color_total_cards": _SPLASH_OFF_COLOR_TOTAL,
|
||||
"splash_previews_with_penalty": _SPLASH_PREVIEWS_WITH_PENALTY,
|
||||
"splash_penalty_reason_events": _SPLASH_PENALTY_CARD_EVENTS,
|
||||
"redis_get_attempts": _REDIS_GET_ATTEMPTS,
|
||||
"redis_get_hits": _REDIS_GET_HITS,
|
||||
"redis_get_errors": _REDIS_GET_ERRORS,
|
||||
"redis_store_attempts": _REDIS_STORE_ATTEMPTS,
|
||||
"redis_store_errors": _REDIS_STORE_ERRORS,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
"record_build_duration",
|
||||
"record_role_counts",
|
||||
"record_curated_sampled",
|
||||
"record_per_theme",
|
||||
"record_request",
|
||||
"record_per_theme_request",
|
||||
"record_per_theme_error",
|
||||
"record_eviction",
|
||||
"preview_metrics",
|
||||
"configure_external_access",
|
||||
"record_splash_analytics",
|
||||
"record_redis_get",
|
||||
"record_redis_store",
|
||||
]
|
||||
|
||||
def record_per_theme_request(slug: str) -> None:
|
||||
"""Increment request counter for a specific theme (cache hit or miss).
|
||||
|
||||
This was previously in the monolith; extracted to keep per-theme request
|
||||
counts consistent with new metrics module ownership.
|
||||
"""
|
||||
_PREVIEW_PER_THEME_REQUESTS[slug] = _PREVIEW_PER_THEME_REQUESTS.get(slug, 0) + 1
|
||||
|
||||
def record_eviction(meta: Dict[str, Any]) -> None:
|
||||
"""Record a cache eviction event.
|
||||
|
||||
meta expected keys: reason, hit_count, age_ms, build_cost_ms, protection_score, cache_limit,
|
||||
size_before, size_after.
|
||||
"""
|
||||
global _EVICTION_TOTAL, _EVICTION_LAST
|
||||
_EVICTION_TOTAL += 1
|
||||
reason = meta.get("reason", "unknown")
|
||||
_EVICTION_BY_REASON[reason] = _EVICTION_BY_REASON.get(reason, 0) + 1
|
||||
_EVICTION_LAST = meta
|
||||
# Optional structured log
|
||||
try: # pragma: no cover
|
||||
if (os.getenv("WEB_THEME_PREVIEW_LOG") or "").lower() in {"1","true","yes","on"}:
|
||||
import json as _json
|
||||
print(_json.dumps({"event": "theme_preview_cache_evict", **meta}, separators=(",",":"))) # noqa: T201
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def record_splash_analytics(off_color_card_count: int, penalty_reason_events: int) -> None:
|
||||
"""Record splash off-color analytics for a single preview build.
|
||||
|
||||
off_color_card_count: number of sampled cards marked with _splash_off_color flag.
|
||||
penalty_reason_events: count of 'splash_off_color_penalty' reason entries encountered.
|
||||
"""
|
||||
global _SPLASH_OFF_COLOR_TOTAL, _SPLASH_PREVIEWS_WITH_PENALTY, _SPLASH_PENALTY_CARD_EVENTS
|
||||
if off_color_card_count > 0:
|
||||
_SPLASH_PREVIEWS_WITH_PENALTY += 1
|
||||
_SPLASH_OFF_COLOR_TOTAL += off_color_card_count
|
||||
if penalty_reason_events > 0:
|
||||
_SPLASH_PENALTY_CARD_EVENTS += penalty_reason_events
|
||||
167
code/web/services/preview_policy.py
Normal file
167
code/web/services/preview_policy.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Preview policy module (Phase 2 extraction).
|
||||
|
||||
Extracts adaptive TTL band logic so experimentation can occur without
|
||||
touching core cache data structures. Future extensions will add:
|
||||
- Environment-variable overrides for band thresholds & step sizes
|
||||
- Adaptive eviction strategy (hit-ratio + recency hybrid)
|
||||
- Backend abstraction tuning knobs (e.g., Redis TTL harmonization)
|
||||
|
||||
Current exported API is intentionally small/stable:
|
||||
|
||||
compute_ttl_adjustment(hit_ratio: float, current_ttl: int,
|
||||
base: int = DEFAULT_TTL_BASE,
|
||||
ttl_min: int = DEFAULT_TTL_MIN,
|
||||
ttl_max: int = DEFAULT_TTL_MAX) -> int
|
||||
Given the recent hit ratio (0..1) and current TTL, returns the new TTL
|
||||
after applying banded adjustment rules. Never mutates globals; caller
|
||||
decides whether to commit the change.
|
||||
|
||||
Constants kept here mirror the prior inline values from preview_cache.
|
||||
They are NOT yet configurable via env to keep behavior unchanged for
|
||||
existing tests. A follow-up task will add env override + validation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_TTL_BASE",
|
||||
"DEFAULT_TTL_MIN",
|
||||
"DEFAULT_TTL_MAX",
|
||||
"BAND_LOW_CRITICAL",
|
||||
"BAND_LOW_MODERATE",
|
||||
"BAND_HIGH_GROW",
|
||||
"compute_ttl_adjustment",
|
||||
]
|
||||
|
||||
DEFAULT_TTL_BASE = 600
|
||||
DEFAULT_TTL_MIN = 300
|
||||
DEFAULT_TTL_MAX = 900
|
||||
|
||||
# Default hit ratio band thresholds (exclusive upper bounds for each tier)
|
||||
_DEFAULT_BAND_LOW_CRITICAL = 0.25 # Severe miss rate – shrink TTL aggressively
|
||||
_DEFAULT_BAND_LOW_MODERATE = 0.55 # Mild miss bias – converge back toward base
|
||||
_DEFAULT_BAND_HIGH_GROW = 0.75 # Healthy hit rate – modest growth
|
||||
|
||||
# Public band variables (may be overridden via env at import time)
|
||||
BAND_LOW_CRITICAL = _DEFAULT_BAND_LOW_CRITICAL
|
||||
BAND_LOW_MODERATE = _DEFAULT_BAND_LOW_MODERATE
|
||||
BAND_HIGH_GROW = _DEFAULT_BAND_HIGH_GROW
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AdjustmentSteps:
|
||||
low_critical: int = -60
|
||||
low_mod_decrease: int = -30
|
||||
low_mod_increase: int = 30
|
||||
high_grow: int = 60
|
||||
high_peak: int = 90 # very high hit ratio
|
||||
|
||||
_STEPS = AdjustmentSteps()
|
||||
|
||||
# --- Environment Override Support (POLICY Env overrides task) --- #
|
||||
_ENV_APPLIED = False
|
||||
|
||||
def _parse_float_env(name: str, default: float) -> float:
|
||||
raw = os.getenv(name)
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
v = float(raw)
|
||||
if not (0.0 <= v <= 1.0):
|
||||
return default
|
||||
return v
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
def _parse_int_env(name: str, default: int) -> int:
|
||||
raw = os.getenv(name)
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
def _apply_env_overrides() -> None:
|
||||
"""Idempotently apply environment overrides for bands & step sizes.
|
||||
|
||||
Env vars:
|
||||
THEME_PREVIEW_TTL_BASE / _MIN / _MAX (ints)
|
||||
THEME_PREVIEW_TTL_BANDS (comma floats: low_critical,low_moderate,high_grow)
|
||||
THEME_PREVIEW_TTL_STEPS (comma ints: low_critical,low_mod_dec,low_mod_inc,high_grow,high_peak)
|
||||
Invalid / partial specs fall back to defaults. Bands are validated to be
|
||||
strictly increasing within (0,1). If validation fails, defaults retained.
|
||||
"""
|
||||
global DEFAULT_TTL_BASE, DEFAULT_TTL_MIN, DEFAULT_TTL_MAX
|
||||
global BAND_LOW_CRITICAL, BAND_LOW_MODERATE, BAND_HIGH_GROW, _STEPS, _ENV_APPLIED
|
||||
if _ENV_APPLIED:
|
||||
return
|
||||
DEFAULT_TTL_BASE = _parse_int_env("THEME_PREVIEW_TTL_BASE", DEFAULT_TTL_BASE)
|
||||
DEFAULT_TTL_MIN = _parse_int_env("THEME_PREVIEW_TTL_MIN", DEFAULT_TTL_MIN)
|
||||
DEFAULT_TTL_MAX = _parse_int_env("THEME_PREVIEW_TTL_MAX", DEFAULT_TTL_MAX)
|
||||
# Ensure ordering min <= base <= max
|
||||
if DEFAULT_TTL_MIN > DEFAULT_TTL_BASE:
|
||||
DEFAULT_TTL_MIN = min(DEFAULT_TTL_MIN, DEFAULT_TTL_BASE)
|
||||
if DEFAULT_TTL_BASE > DEFAULT_TTL_MAX:
|
||||
DEFAULT_TTL_MAX = max(DEFAULT_TTL_BASE, DEFAULT_TTL_MAX)
|
||||
bands_raw = os.getenv("THEME_PREVIEW_TTL_BANDS")
|
||||
if bands_raw:
|
||||
parts = [p.strip() for p in bands_raw.split(',') if p.strip()]
|
||||
vals: list[float] = []
|
||||
for p in parts[:3]:
|
||||
try:
|
||||
vals.append(float(p))
|
||||
except Exception:
|
||||
pass
|
||||
if len(vals) == 3:
|
||||
a, b, c = vals
|
||||
if 0 < a < b < c < 1:
|
||||
BAND_LOW_CRITICAL, BAND_LOW_MODERATE, BAND_HIGH_GROW = a, b, c
|
||||
steps_raw = os.getenv("THEME_PREVIEW_TTL_STEPS")
|
||||
if steps_raw:
|
||||
parts = [p.strip() for p in steps_raw.split(',') if p.strip()]
|
||||
ints: list[int] = []
|
||||
for p in parts[:5]:
|
||||
try:
|
||||
ints.append(int(p))
|
||||
except Exception:
|
||||
pass
|
||||
if len(ints) == 5:
|
||||
_STEPS = AdjustmentSteps(
|
||||
low_critical=ints[0],
|
||||
low_mod_decrease=ints[1],
|
||||
low_mod_increase=ints[2],
|
||||
high_grow=ints[3],
|
||||
high_peak=ints[4],
|
||||
)
|
||||
_ENV_APPLIED = True
|
||||
|
||||
# Apply overrides at import time (safe & idempotent)
|
||||
_apply_env_overrides()
|
||||
|
||||
def compute_ttl_adjustment(
|
||||
hit_ratio: float,
|
||||
current_ttl: int,
|
||||
base: int = DEFAULT_TTL_BASE,
|
||||
ttl_min: int = DEFAULT_TTL_MIN,
|
||||
ttl_max: int = DEFAULT_TTL_MAX,
|
||||
) -> int:
|
||||
"""Return a new TTL based on hit ratio & current TTL.
|
||||
|
||||
Logic mirrors the original inline implementation; extracted for clarity.
|
||||
"""
|
||||
new_ttl = current_ttl
|
||||
if hit_ratio < BAND_LOW_CRITICAL:
|
||||
new_ttl = max(ttl_min, current_ttl + _STEPS.low_critical)
|
||||
elif hit_ratio < BAND_LOW_MODERATE:
|
||||
if current_ttl > base:
|
||||
new_ttl = max(base, current_ttl + _STEPS.low_mod_decrease)
|
||||
elif current_ttl < base:
|
||||
new_ttl = min(base, current_ttl + _STEPS.low_mod_increase)
|
||||
# else already at base – no change
|
||||
elif hit_ratio < BAND_HIGH_GROW:
|
||||
new_ttl = min(ttl_max, current_ttl + _STEPS.high_grow)
|
||||
else:
|
||||
new_ttl = min(ttl_max, current_ttl + _STEPS.high_peak)
|
||||
return new_ttl
|
||||
259
code/web/services/sampling.py
Normal file
259
code/web/services/sampling.py
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
"""Sampling utilities extracted from theme_preview (Core Refactor Phase A - initial extraction).
|
||||
|
||||
This module contains card index construction and the deterministic sampling
|
||||
pipeline used to build preview role buckets. Logic moved with minimal changes
|
||||
to preserve behavior; future refactor steps will further decompose (e.g.,
|
||||
separating card index & rarity calibration, introducing typed models).
|
||||
|
||||
Public (stable) surface for Phase A:
|
||||
sample_real_cards_for_theme(theme: str, limit: int, colors_filter: str | None,
|
||||
*, synergies: list[str], commander: str | None) -> list[dict]
|
||||
|
||||
Internal helpers intentionally start with an underscore to discourage external
|
||||
use; they may change in subsequent refactor steps.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from typing import Any, Dict, List, Optional, TypedDict
|
||||
|
||||
from .card_index import maybe_build_index, get_tag_pool, lookup_commander
|
||||
from .sampling_config import (
|
||||
COMMANDER_COLOR_FILTER_STRICT,
|
||||
COMMANDER_OVERLAP_BONUS,
|
||||
COMMANDER_THEME_MATCH_BONUS,
|
||||
SPLASH_OFF_COLOR_PENALTY,
|
||||
SPLASH_ADAPTIVE_ENABLED,
|
||||
parse_splash_adaptive_scale,
|
||||
ROLE_BASE_WEIGHTS,
|
||||
ROLE_SATURATION_PENALTY,
|
||||
rarity_weight_base,
|
||||
parse_rarity_diversity_targets,
|
||||
RARITY_DIVERSITY_OVER_PENALTY,
|
||||
)
|
||||
|
||||
|
||||
_CARD_INDEX_DEPRECATED: Dict[str, List[Dict[str, Any]]] = {} # kept for back-compat in tests; will be removed
|
||||
|
||||
|
||||
class SampledCard(TypedDict, total=False):
|
||||
"""Typed shape for a sampled card entry emitted to preview layer.
|
||||
|
||||
total=False because curated examples / synthetic placeholders may lack
|
||||
full DB-enriched fields (mana_cost, rarity, color_identity_list, etc.).
|
||||
"""
|
||||
name: str
|
||||
colors: List[str]
|
||||
roles: List[str]
|
||||
tags: List[str]
|
||||
score: float
|
||||
reasons: List[str]
|
||||
mana_cost: str
|
||||
rarity: str
|
||||
color_identity_list: List[str]
|
||||
pip_colors: List[str]
|
||||
|
||||
|
||||
def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
|
||||
tag_set = set(tags)
|
||||
synergy_overlap = tag_set.intersection(synergies)
|
||||
if theme in tag_set:
|
||||
return "payoff"
|
||||
if len(synergy_overlap) >= 2:
|
||||
return "enabler"
|
||||
if len(synergy_overlap) == 1:
|
||||
return "support"
|
||||
return "wildcard"
|
||||
|
||||
|
||||
def _seed_from(theme: str, commander: Optional[str]) -> int:
|
||||
base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
|
||||
h = 0
|
||||
for b in base:
|
||||
h = (h * 131 + b) & 0xFFFFFFFF
|
||||
return h or 1
|
||||
|
||||
|
||||
def _deterministic_shuffle(items: List[Any], seed: int) -> None:
|
||||
rnd = random.Random(seed)
|
||||
rnd.shuffle(items)
|
||||
|
||||
|
||||
def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
|
||||
tag_set = set(tags)
|
||||
synergy_overlap = len(tag_set.intersection(synergies))
|
||||
score = 0.0
|
||||
if theme in tag_set:
|
||||
score += 3.0
|
||||
score += synergy_overlap * 1.2
|
||||
score += ROLE_BASE_WEIGHTS.get(role, 0.5)
|
||||
return score
|
||||
|
||||
|
||||
def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
|
||||
if not commander_tags or not synergy_set:
|
||||
return 0.0
|
||||
overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
|
||||
if overlap_synergy <= 0:
|
||||
return 0.0
|
||||
return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))
|
||||
|
||||
|
||||
def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]: # thin wrapper for legacy name
|
||||
return lookup_commander(commander)
|
||||
|
||||
|
||||
def sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[SampledCard]:
|
||||
"""Return scored, role-classified real cards for a theme.
|
||||
|
||||
Mirrors prior `_sample_real_cards_for_theme` behavior for parity.
|
||||
"""
|
||||
maybe_build_index()
|
||||
pool = get_tag_pool(theme)
|
||||
if not pool:
|
||||
return []
|
||||
commander_card = _lookup_commander(commander)
|
||||
commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
|
||||
commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
|
||||
if colors_filter:
|
||||
allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
|
||||
if allowed:
|
||||
pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
|
||||
if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
|
||||
allow_splash = len(commander_colors) >= 4
|
||||
new_pool: List[Dict[str, Any]] = []
|
||||
for c in pool:
|
||||
ci = set(c.get("color_identity", ""))
|
||||
if not ci or ci.issubset(commander_colors):
|
||||
new_pool.append(c)
|
||||
continue
|
||||
if allow_splash:
|
||||
off = ci - commander_colors
|
||||
if len(off) == 1:
|
||||
c["_splash_off_color"] = True # type: ignore
|
||||
new_pool.append(c)
|
||||
continue
|
||||
pool = new_pool
|
||||
seen_names: set[str] = set()
|
||||
payoff: List[SampledCard] = []
|
||||
enabler: List[SampledCard] = []
|
||||
support: List[SampledCard] = []
|
||||
wildcard: List[SampledCard] = []
|
||||
rarity_counts: Dict[str, int] = {}
|
||||
rarity_diversity = parse_rarity_diversity_targets()
|
||||
synergy_set = set(synergies)
|
||||
rarity_weight_cfg = rarity_weight_base()
|
||||
splash_scale = parse_splash_adaptive_scale() if SPLASH_ADAPTIVE_ENABLED else None
|
||||
commander_color_count = len(commander_colors) if commander_colors else 0
|
||||
for raw in pool:
|
||||
nm = raw.get("name")
|
||||
if not nm or nm in seen_names:
|
||||
continue
|
||||
seen_names.add(nm)
|
||||
tags = raw.get("tags", [])
|
||||
role = _classify_role(theme, synergies, tags)
|
||||
score = _score_card(theme, synergies, role, tags)
|
||||
reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
|
||||
if commander_card:
|
||||
if theme in tags:
|
||||
score += COMMANDER_THEME_MATCH_BONUS
|
||||
reasons.append("commander_theme_match")
|
||||
scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
|
||||
if scaled:
|
||||
score += scaled
|
||||
reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
|
||||
reasons.append("commander_bias")
|
||||
rarity = raw.get("rarity") or ""
|
||||
if rarity:
|
||||
base_rarity_weight = rarity_weight_cfg.get(rarity, 0.25)
|
||||
count_so_far = rarity_counts.get(rarity, 0)
|
||||
increment_weight = base_rarity_weight / (1 + 0.4 * count_so_far)
|
||||
score += increment_weight
|
||||
rarity_counts[rarity] = count_so_far + 1
|
||||
reasons.append(f"rarity_weight_calibrated:{rarity}:{round(increment_weight,2)}")
|
||||
if rarity_diversity and rarity in rarity_diversity:
|
||||
lo, hi = rarity_diversity[rarity]
|
||||
# Only enforce upper bound (overflow penalty)
|
||||
if rarity_counts[rarity] > hi:
|
||||
score += RARITY_DIVERSITY_OVER_PENALTY
|
||||
reasons.append(f"rarity_diversity_overflow:{rarity}:{hi}:{RARITY_DIVERSITY_OVER_PENALTY}")
|
||||
if raw.get("_splash_off_color"):
|
||||
penalty = SPLASH_OFF_COLOR_PENALTY
|
||||
if splash_scale and commander_color_count:
|
||||
scale = splash_scale.get(commander_color_count, 1.0)
|
||||
adaptive_penalty = round(penalty * scale, 4)
|
||||
score += adaptive_penalty
|
||||
reasons.append(f"splash_off_color_penalty_adaptive:{commander_color_count}:{adaptive_penalty}")
|
||||
else:
|
||||
score += penalty # negative value
|
||||
reasons.append(f"splash_off_color_penalty:{penalty}")
|
||||
item: SampledCard = {
|
||||
"name": nm,
|
||||
"colors": list(raw.get("color_identity", "")),
|
||||
"roles": [role],
|
||||
"tags": tags,
|
||||
"score": score,
|
||||
"reasons": reasons,
|
||||
"mana_cost": raw.get("mana_cost"),
|
||||
"rarity": rarity,
|
||||
"color_identity_list": raw.get("color_identity_list", []),
|
||||
"pip_colors": raw.get("pip_colors", []),
|
||||
}
|
||||
if role == "payoff":
|
||||
payoff.append(item)
|
||||
elif role == "enabler":
|
||||
enabler.append(item)
|
||||
elif role == "support":
|
||||
support.append(item)
|
||||
else:
|
||||
wildcard.append(item)
|
||||
seed = _seed_from(theme, commander)
|
||||
for bucket in (payoff, enabler, support, wildcard):
|
||||
_deterministic_shuffle(bucket, seed)
|
||||
bucket.sort(key=lambda x: (-x["score"], x["name"]))
|
||||
target_payoff = max(1, int(round(limit * 0.4)))
|
||||
target_enabler_support = max(1, int(round(limit * 0.4)))
|
||||
target_wild = max(0, limit - target_payoff - target_enabler_support)
|
||||
|
||||
def take(n: int, source: List[SampledCard]):
|
||||
for i in range(min(n, len(source))):
|
||||
yield source[i]
|
||||
|
||||
chosen: List[SampledCard] = []
|
||||
chosen.extend(take(target_payoff, payoff))
|
||||
es_combined = enabler + support
|
||||
chosen.extend(take(target_enabler_support, es_combined))
|
||||
chosen.extend(take(target_wild, wildcard))
|
||||
|
||||
if len(chosen) < limit:
|
||||
def fill_from(src: List[SampledCard]):
|
||||
nonlocal chosen
|
||||
for it in src:
|
||||
if len(chosen) >= limit:
|
||||
break
|
||||
if it not in chosen:
|
||||
chosen.append(it)
|
||||
for bucket in (payoff, enabler, support, wildcard):
|
||||
fill_from(bucket)
|
||||
|
||||
role_soft_caps = {
|
||||
"payoff": int(round(limit * 0.5)),
|
||||
"enabler": int(round(limit * 0.35)),
|
||||
"support": int(round(limit * 0.35)),
|
||||
"wildcard": int(round(limit * 0.25)),
|
||||
}
|
||||
role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
|
||||
for it in chosen:
|
||||
r = (it.get("roles") or [None])[0]
|
||||
if not r or r not in role_soft_caps:
|
||||
continue
|
||||
role_seen[r] += 1
|
||||
if role_seen[r] > max(1, role_soft_caps[r]):
|
||||
it["score"] = it.get("score", 0) + ROLE_SATURATION_PENALTY # negative value
|
||||
(it.setdefault("reasons", [])).append(f"role_saturation_penalty:{ROLE_SATURATION_PENALTY}")
|
||||
if len(chosen) > limit:
|
||||
chosen = chosen[:limit]
|
||||
return chosen
|
||||
|
||||
# Expose overlap scale for unit tests
|
||||
commander_overlap_scale = _commander_overlap_scale
|
||||
123
code/web/services/sampling_config.py
Normal file
123
code/web/services/sampling_config.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""Scoring & sampling configuration constants (Phase 2 extraction).
|
||||
|
||||
Centralizes knobs used by the sampling pipeline so future tuning (or
|
||||
experimentation via environment variables) can occur without editing the
|
||||
core algorithm code.
|
||||
|
||||
Public constants (import into sampling.py and tests):
|
||||
COMMANDER_COLOR_FILTER_STRICT
|
||||
COMMANDER_OVERLAP_BONUS
|
||||
COMMANDER_THEME_MATCH_BONUS
|
||||
SPLASH_OFF_COLOR_PENALTY
|
||||
ROLE_BASE_WEIGHTS
|
||||
ROLE_SATURATION_PENALTY
|
||||
|
||||
Helper functions:
|
||||
rarity_weight_base() -> dict[str, float]
|
||||
Returns per-rarity base weights (reads env each call to preserve
|
||||
existing test expectations that patch env before invoking sampling).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Dict, Tuple, Optional
|
||||
|
||||
# Commander related bonuses (identical defaults to previous inline values)
|
||||
COMMANDER_COLOR_FILTER_STRICT = True
|
||||
COMMANDER_OVERLAP_BONUS = 1.8
|
||||
COMMANDER_THEME_MATCH_BONUS = 0.9
|
||||
|
||||
# Penalties / bonuses
|
||||
SPLASH_OFF_COLOR_PENALTY = -0.3
|
||||
# Adaptive splash penalty feature flag & scaling factors.
|
||||
# When SPLASH_ADAPTIVE=1 the effective penalty becomes:
|
||||
# base_penalty * splash_adaptive_scale(color_count)
|
||||
# Where color_count is the number of distinct commander colors (1-5).
|
||||
# Default scale keeps existing behavior at 1-3 colors, softens at 4, much lighter at 5.
|
||||
SPLASH_ADAPTIVE_ENABLED = os.getenv("SPLASH_ADAPTIVE", "0") == "1"
|
||||
_DEFAULT_SPLASH_SCALE = "1:1.0,2:1.0,3:1.0,4:0.6,5:0.35"
|
||||
def parse_splash_adaptive_scale() -> Dict[int, float]: # dynamic to allow test env changes
|
||||
spec = os.getenv("SPLASH_ADAPTIVE_SCALE", _DEFAULT_SPLASH_SCALE)
|
||||
mapping: Dict[int, float] = {}
|
||||
for part in spec.split(','):
|
||||
part = part.strip()
|
||||
if not part or ':' not in part:
|
||||
continue
|
||||
k_s, v_s = part.split(':', 1)
|
||||
try:
|
||||
k = int(k_s)
|
||||
v = float(v_s)
|
||||
if 1 <= k <= 5 and v > 0:
|
||||
mapping[k] = v
|
||||
except Exception:
|
||||
continue
|
||||
# Ensure all 1-5 present; fallback to 1.0 if unspecified
|
||||
for i in range(1, 6):
|
||||
mapping.setdefault(i, 1.0)
|
||||
return mapping
|
||||
ROLE_SATURATION_PENALTY = -0.4
|
||||
|
||||
# Base role weights applied inside score calculation
|
||||
ROLE_BASE_WEIGHTS: Dict[str, float] = {
|
||||
"payoff": 2.5,
|
||||
"enabler": 2.0,
|
||||
"support": 1.5,
|
||||
"wildcard": 0.9,
|
||||
}
|
||||
|
||||
# Rarity base weights (diminishing duplicate influence applied in sampling pipeline)
|
||||
# Read from env at call time to allow tests to modify.
|
||||
|
||||
def rarity_weight_base() -> Dict[str, float]: # dynamic to allow env override per test
|
||||
return {
|
||||
"mythic": float(os.getenv("RARITY_W_MYTHIC", "1.2")),
|
||||
"rare": float(os.getenv("RARITY_W_RARE", "0.9")),
|
||||
"uncommon": float(os.getenv("RARITY_W_UNCOMMON", "0.65")),
|
||||
"common": float(os.getenv("RARITY_W_COMMON", "0.4")),
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
"COMMANDER_COLOR_FILTER_STRICT",
|
||||
"COMMANDER_OVERLAP_BONUS",
|
||||
"COMMANDER_THEME_MATCH_BONUS",
|
||||
"SPLASH_OFF_COLOR_PENALTY",
|
||||
"SPLASH_ADAPTIVE_ENABLED",
|
||||
"parse_splash_adaptive_scale",
|
||||
"ROLE_BASE_WEIGHTS",
|
||||
"ROLE_SATURATION_PENALTY",
|
||||
"rarity_weight_base",
|
||||
"parse_rarity_diversity_targets",
|
||||
"RARITY_DIVERSITY_OVER_PENALTY",
|
||||
]
|
||||
|
||||
|
||||
# Extended rarity diversity (optional) ---------------------------------------
|
||||
# Env var RARITY_DIVERSITY_TARGETS pattern e.g. "mythic:0-1,rare:0-2,uncommon:0-4,common:0-6"
|
||||
# Parsed into mapping rarity -> (min,max). Only max is enforced currently (penalty applied
|
||||
# when overflow occurs); min reserved for potential future boosting logic.
|
||||
|
||||
RARITY_DIVERSITY_OVER_PENALTY = float(os.getenv("RARITY_DIVERSITY_OVER_PENALTY", "-0.5"))
|
||||
|
||||
def parse_rarity_diversity_targets() -> Optional[Dict[str, Tuple[int, int]]]:
|
||||
spec = os.getenv("RARITY_DIVERSITY_TARGETS")
|
||||
if not spec:
|
||||
return None
|
||||
targets: Dict[str, Tuple[int, int]] = {}
|
||||
for part in spec.split(','):
|
||||
part = part.strip()
|
||||
if not part or ':' not in part:
|
||||
continue
|
||||
name, rng = part.split(':', 1)
|
||||
name = name.strip().lower()
|
||||
if '-' not in rng:
|
||||
continue
|
||||
lo_s, hi_s = rng.split('-', 1)
|
||||
try:
|
||||
lo = int(lo_s)
|
||||
hi = int(hi_s)
|
||||
if lo < 0 or hi < lo:
|
||||
continue
|
||||
targets[name] = (lo, hi)
|
||||
except Exception:
|
||||
continue
|
||||
return targets or None
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue