feat: add Budget Mode with price cache infrastructure and stale price warnings

This commit is contained in:
matt 2026-03-23 16:19:18 -07:00
parent 1aa8e4d7e8
commit ec23775205
42 changed files with 6976 additions and 2753 deletions

View file

@ -0,0 +1,504 @@
"""Price service for card price lookups.
Loads prices from the local Scryfall bulk data file (one card per line),
caches results in a compact JSON file under card_files/, and provides
thread-safe batch lookups for budget evaluation.
Cache strategy:
- On first access, load from prices_cache.json if < TTL hours old
- If cache is stale or missing, rebuild by streaming the bulk data file
- In-memory dict (normalized lowercase key) is kept for fast lookups
- Background refresh available via refresh_cache_background()
"""
from __future__ import annotations
import json
import os
import threading
import time
from typing import Any, Callable, Dict, List, Optional
from code.path_util import card_files_dir, card_files_raw_dir
from code.web.services.base import BaseService
from code import logging_util
logger = logging_util.logging.getLogger(__name__)
logger.setLevel(logging_util.LOG_LEVEL)
logger.addHandler(logging_util.file_handler)
logger.addHandler(logging_util.stream_handler)
_CACHE_TTL_SECONDS = 86400 # 24 hours
_BULK_DATA_FILENAME = "scryfall_bulk_data.json"
_PRICE_CACHE_FILENAME = "prices_cache.json"
class PriceService(BaseService):
"""Service for card price lookups backed by Scryfall bulk data.
Reads prices from the local Scryfall bulk data file that the setup
pipeline already downloads. A compact JSON cache is written to
card_files/ so subsequent startups load instantly without re-scanning
the 500 MB bulk file.
All public methods are thread-safe.
"""
def __init__(
self,
*,
bulk_data_path: Optional[str] = None,
cache_path: Optional[str] = None,
cache_ttl: int = _CACHE_TTL_SECONDS,
) -> None:
super().__init__()
self._bulk_path: str = bulk_data_path or os.path.join(
card_files_raw_dir(), _BULK_DATA_FILENAME
)
self._cache_path: str = cache_path or os.path.join(
card_files_dir(), _PRICE_CACHE_FILENAME
)
self._ttl: int = cache_ttl
# {normalized_card_name: {"usd": float, "usd_foil": float, "eur": float, "eur_foil": float}}
self._cache: Dict[str, Dict[str, float]] = {}
self._lock = threading.RLock()
self._loaded = False
self._last_refresh: float = 0.0
self._hit_count = 0
self._miss_count = 0
self._refresh_thread: Optional[threading.Thread] = None
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get_price(
self,
card_name: str,
region: str = "usd",
foil: bool = False,
) -> Optional[float]:
"""Return the price for *card_name* or ``None`` if not found.
Args:
card_name: Card name (case-insensitive).
region: Price region - ``"usd"`` or ``"eur"``.
foil: If ``True`` return foil price.
Returns:
Price as float or ``None`` when missing / card unknown.
"""
self._ensure_loaded()
price_key = region + ("_foil" if foil else "")
entry = self._cache.get(card_name.lower().strip())
self.queue_lazy_refresh(card_name)
with self._lock:
if entry is not None:
self._hit_count += 1
return entry.get(price_key)
self._miss_count += 1
return None
def get_prices_batch(
self,
card_names: List[str],
region: str = "usd",
foil: bool = False,
) -> Dict[str, Optional[float]]:
"""Return a mapping of card name → price for all requested cards.
Missing cards map to ``None``. Preserves input ordering and
original case in the returned keys.
Args:
card_names: List of card names to look up.
region: Price region - ``"usd"`` or ``"eur"``.
foil: If ``True`` return foil prices.
Returns:
Dict mapping each input name to its price (or ``None``).
"""
self._ensure_loaded()
price_key = region + ("_foil" if foil else "")
result: Dict[str, Optional[float]] = {}
hits = 0
misses = 0
for name in card_names:
entry = self._cache.get(name.lower().strip())
if entry is not None:
result[name] = entry.get(price_key)
hits += 1
else:
result[name] = None
misses += 1
with self._lock:
self._hit_count += hits
self._miss_count += misses
return result
def cache_stats(self) -> Dict[str, Any]:
"""Return telemetry snapshot about cache performance.
Returns:
Dict with ``total_entries``, ``hit_count``, ``miss_count``,
``hit_rate``, ``last_refresh``, ``loaded``, ``cache_path``.
"""
self._ensure_loaded()
with self._lock:
total = self._hit_count + self._miss_count
return {
"total_entries": len(self._cache),
"hit_count": self._hit_count,
"miss_count": self._miss_count,
"hit_rate": (self._hit_count / total) if total > 0 else 0.0,
"last_refresh": self._last_refresh,
"loaded": self._loaded,
"cache_path": self._cache_path,
"bulk_data_available": os.path.exists(self._bulk_path),
}
def refresh_cache_background(self) -> None:
"""Spawn a daemon thread to rebuild the price cache asynchronously.
If a refresh is already in progress, this call is a no-op.
"""
with self._lock:
if self._refresh_thread and self._refresh_thread.is_alive():
logger.debug("Price cache background refresh already running")
return
t = threading.Thread(
target=self._rebuild_cache,
daemon=True,
name="price-cache-refresh",
)
self._refresh_thread = t
t.start()
def get_cache_built_at(self) -> str | None:
"""Return a human-readable price cache build date, or None if unavailable."""
try:
if os.path.exists(self._cache_path):
import datetime
built = os.path.getmtime(self._cache_path)
if built:
dt = datetime.datetime.fromtimestamp(built, tz=datetime.timezone.utc)
return dt.strftime("%B %d, %Y")
except Exception:
pass
return None
def start_daily_refresh(self, hour: int = 1, on_after_rebuild: Optional[Callable] = None) -> None:
"""Start a daemon thread that rebuilds prices once daily at *hour* UTC.
Checks every 30 minutes. Safe to call multiple times only one
scheduler thread will be started.
Args:
hour: UTC hour (023) at which to run the nightly rebuild.
on_after_rebuild: Optional callable invoked after each successful
rebuild (e.g., to update the parquet files).
"""
with self._lock:
if getattr(self, "_daily_thread", None) and self._daily_thread.is_alive(): # type: ignore[attr-defined]
return
def _loop() -> None:
import datetime
last_date: "datetime.date | None" = None
while True:
try:
now = datetime.datetime.now(tz=datetime.timezone.utc)
today = now.date()
if now.hour >= hour and today != last_date:
logger.info("Scheduled price refresh running (daily at %02d:00 UTC) …", hour)
self._rebuild_cache()
last_date = today
if on_after_rebuild:
try:
on_after_rebuild()
except Exception as exc:
logger.error("on_after_rebuild callback failed: %s", exc)
logger.info("Scheduled price refresh complete.")
except Exception as exc:
logger.error("Daily price refresh error: %s", exc)
time.sleep(1800)
t = threading.Thread(target=_loop, daemon=True, name="price-daily-refresh")
self._daily_thread = t # type: ignore[attr-defined]
t.start()
logger.info("Daily price refresh scheduler started (hour=%d UTC)", hour)
def start_lazy_refresh(self, stale_days: int = 7) -> None:
"""Start a background worker that refreshes per-card prices from the
Scryfall API when they have not been updated within *stale_days* days.
Queuing: call queue_lazy_refresh(card_name) to mark a card as stale.
The worker runs every 60 seconds, processes up to 20 cards per cycle,
and respects Scryfall's 100 ms rate-limit guideline.
"""
with self._lock:
if getattr(self, "_lazy_thread", None) and self._lazy_thread.is_alive(): # type: ignore[attr-defined]
return
self._lazy_stale_seconds: float = stale_days * 86400
self._lazy_queue: set[str] = set()
self._lazy_ts: dict[str, float] = self._load_lazy_ts()
self._lazy_lock = threading.Lock()
def _worker() -> None:
while True:
try:
time.sleep(60)
with self._lazy_lock:
batch = list(self._lazy_queue)[:20]
self._lazy_queue -= set(batch)
if batch:
self._fetch_lazy_batch(batch)
except Exception as exc:
logger.error("Lazy price refresh error: %s", exc)
t = threading.Thread(target=_worker, daemon=True, name="price-lazy-refresh")
self._lazy_thread = t # type: ignore[attr-defined]
t.start()
logger.info("Lazy price refresh worker started (stale_days=%d)", stale_days)
def queue_lazy_refresh(self, card_name: str) -> None:
"""Mark *card_name* for a lazy per-card price update if its cached
price is stale or missing. No-op when lazy mode is not enabled."""
if not hasattr(self, "_lazy_queue"):
return
key = card_name.lower().strip()
ts = self._lazy_ts.get(key)
if ts is None or (time.time() - ts) > self._lazy_stale_seconds:
with self._lazy_lock:
self._lazy_queue.add(card_name.strip())
def _fetch_lazy_batch(self, names: list[str]) -> None:
"""Fetch fresh prices for *names* from the Scryfall named-card API."""
import urllib.request as _urllib
import urllib.parse as _urlparse
now = time.time()
updated: dict[str, float] = {}
for name in names:
try:
url = "https://api.scryfall.com/cards/named?" + _urlparse.urlencode({"exact": name, "format": "json"})
req = _urllib.Request(url, headers={"User-Agent": "MTGPythonDeckbuilder/1.0"})
with _urllib.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read().decode())
raw_prices: dict = data.get("prices") or {}
entry = self._extract_prices(raw_prices)
if entry:
key = name.lower()
with self._lock:
self._cache[key] = entry
updated[key] = now
logger.debug("Lazy refresh: %s → $%.2f", name, entry.get("usd", 0))
except Exception as exc:
logger.debug("Lazy price fetch skipped for %s: %s", name, exc)
time.sleep(0.1) # 100 ms — Scryfall rate-limit guideline
if updated:
self._lazy_ts.update(updated)
self._save_lazy_ts()
# Also persist the updated in-memory cache to the JSON cache file
try:
self._persist_cache_snapshot()
except Exception:
pass
def _load_lazy_ts(self) -> dict[str, float]:
"""Load per-card timestamps from companion file."""
ts_path = self._cache_path + ".ts"
try:
if os.path.exists(ts_path):
with open(ts_path, "r", encoding="utf-8") as fh:
return json.load(fh)
except Exception:
pass
return {}
def _save_lazy_ts(self) -> None:
"""Atomically persist per-card timestamps."""
ts_path = self._cache_path + ".ts"
tmp = ts_path + ".tmp"
try:
with open(tmp, "w", encoding="utf-8") as fh:
json.dump(self._lazy_ts, fh, separators=(",", ":"))
os.replace(tmp, ts_path)
except Exception as exc:
logger.warning("Failed to save lazy timestamps: %s", exc)
def get_stale_cards(self, threshold_hours: int = 24) -> set[str]:
"""Return the set of card names whose cached price is older than *threshold_hours*.
Uses the per-card timestamp sidecar (``prices_cache.json.ts``). If the
sidecar is absent, all priced cards are considered stale (safe default).
Returns an empty set when *threshold_hours* is 0 (warnings disabled).
Card names are returned in their original (display-name) casing as stored
in ``self._cache``.
"""
import time as _t
if threshold_hours <= 0:
return set()
cutoff = _t.time() - threshold_hours * 3600
with self._lock:
ts_map: dict[str, float] = dict(self._lazy_ts)
cached_keys: set[str] = set(self._cache.keys())
stale: set[str] = set()
for key in cached_keys:
ts = ts_map.get(key)
if ts is None or ts < cutoff:
stale.add(key)
return stale
def _persist_cache_snapshot(self) -> None:
"""Write the current in-memory cache to the JSON cache file (atomic)."""
import time as _t
with self._lock:
snapshot = dict(self._cache)
built = self._last_refresh or _t.time()
cache_data = {"prices": snapshot, "built_at": built}
tmp_path = self._cache_path + ".tmp"
with open(tmp_path, "w", encoding="utf-8") as fh:
json.dump(cache_data, fh, separators=(",", ":"))
os.replace(tmp_path, self._cache_path)
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _ensure_loaded(self) -> None:
"""Lazy-load the price cache on first access (double-checked lock)."""
if self._loaded:
return
with self._lock:
if self._loaded:
return
self._load_or_rebuild()
self._loaded = True
def _load_or_rebuild(self) -> None:
"""Load from JSON cache if fresh; otherwise rebuild from bulk data."""
if os.path.exists(self._cache_path):
try:
age = time.time() - os.path.getmtime(self._cache_path)
if age < self._ttl:
self._load_from_cache_file()
logger.info(
"Loaded %d prices from cache (age %.1fh)",
len(self._cache),
age / 3600,
)
return
logger.info("Price cache stale (%.1fh old), rebuilding", age / 3600)
except Exception as exc:
logger.warning("Price cache unreadable, rebuilding: %s", exc)
self._rebuild_cache()
def _load_from_cache_file(self) -> None:
"""Deserialize the compact prices cache JSON into memory."""
with open(self._cache_path, "r", encoding="utf-8") as fh:
data = json.load(fh)
self._cache = data.get("prices", {})
self._last_refresh = data.get("built_at", 0.0)
def _rebuild_cache(self) -> None:
"""Stream the Scryfall bulk data file and extract prices.
Writes a compact cache JSON then swaps the in-memory dict.
Uses an atomic rename so concurrent readers see a complete file.
"""
if not os.path.exists(self._bulk_path):
logger.warning("Scryfall bulk data not found at %s", self._bulk_path)
return
logger.info("Building price cache from %s ...", self._bulk_path)
new_cache: Dict[str, Dict[str, float]] = {}
built_at = time.time()
try:
with open(self._bulk_path, "r", encoding="utf-8") as fh:
for raw_line in fh:
line = raw_line.strip().rstrip(",")
if not line or line in ("[", "]"):
continue
try:
card = json.loads(line)
except json.JSONDecodeError:
continue
name: str = card.get("name", "")
prices: Dict[str, Any] = card.get("prices") or {}
if not name:
continue
entry = self._extract_prices(prices)
if not entry:
continue
# Index by both the combined name and each face name
names_to_index = [name]
if " // " in name:
names_to_index += [part.strip() for part in name.split(" // ")]
for idx_name in names_to_index:
key = idx_name.lower()
existing = new_cache.get(key)
# Prefer cheapest non-foil USD price across printings
new_usd = entry.get("usd", 9999.0)
if existing is None or new_usd < existing.get("usd", 9999.0):
new_cache[key] = entry
except Exception as exc:
logger.error("Failed to parse bulk data: %s", exc)
return
# Write compact cache atomically
try:
cache_data = {"prices": new_cache, "built_at": built_at}
tmp_path = self._cache_path + ".tmp"
with open(tmp_path, "w", encoding="utf-8") as fh:
json.dump(cache_data, fh, separators=(",", ":"))
os.replace(tmp_path, self._cache_path)
logger.info(
"Price cache written: %d cards → %s", len(new_cache), self._cache_path
)
except Exception as exc:
logger.error("Failed to write price cache: %s", exc)
with self._lock:
self._cache = new_cache
self._last_refresh = built_at
# Stamp all keys as fresh so get_stale_cards() reflects the rebuild
for key in new_cache:
self._lazy_ts[key] = built_at
self._save_lazy_ts()
@staticmethod
def _extract_prices(prices: Dict[str, Any]) -> Dict[str, float]:
"""Convert raw Scryfall prices dict to {region_key: float} entries."""
result: Dict[str, float] = {}
for key in ("usd", "usd_foil", "eur", "eur_foil"):
raw = prices.get(key)
if raw is not None and raw != "":
try:
result[key] = float(raw)
except (ValueError, TypeError):
pass
return result
# ---------------------------------------------------------------------------
# Module-level singleton (lazy)
# ---------------------------------------------------------------------------
_INSTANCE: Optional[PriceService] = None
_INSTANCE_LOCK = threading.Lock()
def get_price_service() -> PriceService:
"""Return the shared PriceService singleton, creating it on first call."""
global _INSTANCE
if _INSTANCE is None:
with _INSTANCE_LOCK:
if _INSTANCE is None:
_INSTANCE = PriceService()
return _INSTANCE