mtg_python_deckbuilder/code/deck_builder/background_loader.py

310 lines
8.9 KiB
Python

"""Loader for background cards derived from all_cards.parquet."""
from __future__ import annotations
import ast
import re
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Any, Mapping, Tuple
from logging_util import get_logger
from deck_builder.partner_background_utils import analyze_partner_background
LOGGER = get_logger(__name__)
@dataclass(frozen=True, slots=True)
class BackgroundCard:
"""Normalized background card entry."""
name: str
face_name: str | None
display_name: str
slug: str
color_identity: Tuple[str, ...]
colors: Tuple[str, ...]
mana_cost: str
mana_value: float | None
type_line: str
oracle_text: str
keywords: Tuple[str, ...]
theme_tags: Tuple[str, ...]
raw_theme_tags: Tuple[str, ...]
edhrec_rank: int | None
layout: str
side: str | None
@dataclass(frozen=True, slots=True)
class BackgroundCatalog:
source_path: Path
etag: str
mtime_ns: int
size: int
version: str
entries: Tuple[BackgroundCard, ...]
by_name: Mapping[str, BackgroundCard]
def get(self, name: str) -> BackgroundCard | None:
return self.by_name.get(name.lower())
def load_background_cards(
source_path: str | Path | None = None,
) -> BackgroundCatalog:
"""Load and cache background card data from all_cards.parquet."""
resolved = _resolve_background_path(source_path)
try:
stat = resolved.stat()
mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
size = stat.st_size
except FileNotFoundError:
raise FileNotFoundError(f"Background data not found at {resolved}") from None
entries, version = _load_background_cards_cached(str(resolved), mtime_ns)
etag = f"{size}-{mtime_ns}-{len(entries)}"
catalog = BackgroundCatalog(
source_path=resolved,
etag=etag,
mtime_ns=mtime_ns,
size=size,
version=version,
entries=entries,
by_name={card.display_name.lower(): card for card in entries},
)
LOGGER.info("background_cards_loaded count=%s version=%s path=%s", len(entries), version, resolved)
return catalog
@lru_cache(maxsize=4)
def _load_background_cards_cached(path_str: str, mtime_ns: int) -> Tuple[Tuple[BackgroundCard, ...], str]:
path = Path(path_str)
if not path.exists():
return tuple(), "unknown"
try:
import pandas as pd
df = pd.read_parquet(path, engine="pyarrow")
# Filter for background cards
if 'isBackground' not in df.columns:
LOGGER.warning("isBackground column not found in %s", path)
return tuple(), "unknown"
df_backgrounds = df[df['isBackground']].copy()
if len(df_backgrounds) == 0:
LOGGER.warning("No background cards found in %s", path)
return tuple(), "unknown"
entries = _rows_to_cards(df_backgrounds)
version = "parquet"
except Exception as e:
LOGGER.error("Failed to load backgrounds from %s: %s", path, e)
return tuple(), "unknown"
frozen = tuple(entries)
return frozen, version
def _resolve_background_path(override: str | Path | None) -> Path:
"""Resolve path to all_cards.parquet."""
if override:
return Path(override).resolve()
# Use card_files/processed/all_cards.parquet
return Path("card_files/processed/all_cards.parquet").resolve()
def _rows_to_cards(df) -> list[BackgroundCard]:
"""Convert DataFrame rows to BackgroundCard objects."""
entries: list[BackgroundCard] = []
seen: set[str] = set()
for _, row in df.iterrows():
if row.empty:
continue
card = _row_to_card(row)
if card is None:
continue
key = card.display_name.lower()
if key in seen:
continue
seen.add(key)
entries.append(card)
entries.sort(key=lambda card: card.display_name)
return entries
def _row_to_card(row) -> BackgroundCard | None:
"""Convert a DataFrame row to a BackgroundCard."""
# Helper to safely get values from DataFrame row
def get_val(key: str):
try:
if hasattr(row, key):
val = getattr(row, key)
# Handle pandas NA/None
if val is None or (hasattr(val, '__class__') and 'NA' in val.__class__.__name__):
return None
return val
return None
except Exception:
return None
name = _clean_str(get_val("name"))
face_name = _clean_str(get_val("faceName")) or None
display = face_name or name
if not display:
return None
type_line = _clean_str(get_val("type"))
oracle_text = _clean_multiline(get_val("text"))
raw_theme_tags = tuple(_parse_literal_list(get_val("themeTags")))
detection = analyze_partner_background(type_line, oracle_text, raw_theme_tags)
if not detection.is_background:
return None
return BackgroundCard(
name=name,
face_name=face_name,
display_name=display,
slug=_slugify(display),
color_identity=_parse_color_list(get_val("colorIdentity")),
colors=_parse_color_list(get_val("colors")),
mana_cost=_clean_str(get_val("manaCost")),
mana_value=_parse_float(get_val("manaValue")),
type_line=type_line,
oracle_text=oracle_text,
keywords=tuple(_split_list(get_val("keywords"))),
theme_tags=tuple(tag for tag in raw_theme_tags if tag),
raw_theme_tags=raw_theme_tags,
edhrec_rank=_parse_int(get_val("edhrecRank")),
layout=_clean_str(get_val("layout")) or "normal",
side=_clean_str(get_val("side")) or None,
)
def _clean_str(value: object) -> str:
if value is None:
return ""
return str(value).strip()
def _clean_multiline(value: object) -> str:
if value is None:
return ""
text = str(value).replace("\r\n", "\n").replace("\r", "\n")
return "\n".join(line.rstrip() for line in text.splitlines())
def _parse_literal_list(value: object) -> list[str]:
if value is None:
return []
# Check if it's a numpy array (from Parquet/pandas)
is_numpy = False
try:
import numpy as np
is_numpy = isinstance(value, np.ndarray)
except ImportError:
pass
# Handle lists, tuples, sets, and numpy arrays
if isinstance(value, (list, tuple, set)) or is_numpy:
return [str(item).strip() for item in value if str(item).strip()]
text = str(value).strip()
if not text:
return []
try:
parsed = ast.literal_eval(text)
except Exception:
parsed = None
if isinstance(parsed, (list, tuple, set)):
return [str(item).strip() for item in parsed if str(item).strip()]
parts = [part.strip() for part in text.replace(";", ",").split(",")]
return [part for part in parts if part]
def _split_list(value: object) -> list[str]:
# Check if it's a numpy array (from Parquet/pandas)
is_numpy = False
try:
import numpy as np
is_numpy = isinstance(value, np.ndarray)
except ImportError:
pass
if isinstance(value, (list, tuple, set)) or is_numpy:
return [str(item).strip() for item in value if str(item).strip()]
text = _clean_str(value)
if not text:
return []
parts = [part.strip() for part in text.split(",")]
return [part for part in parts if part]
def _parse_color_list(value: object) -> Tuple[str, ...]:
# Check if it's a numpy array (from Parquet/pandas)
is_numpy = False
try:
import numpy as np
is_numpy = isinstance(value, np.ndarray)
except ImportError:
pass
if isinstance(value, (list, tuple, set)) or is_numpy:
parts = [str(item).strip().upper() for item in value if str(item).strip()]
return tuple(parts)
text = _clean_str(value)
if not text:
return tuple()
parts = [part.strip().upper() for part in text.split(",")]
return tuple(part for part in parts if part)
def _parse_float(value: object) -> float | None:
text = _clean_str(value)
if not text:
return None
try:
return float(text)
except ValueError:
return None
def _parse_int(value: object) -> int | None:
text = _clean_str(value)
if not text:
return None
try:
return int(float(text))
except ValueError:
return None
def _slugify(value: str) -> str:
lowered = value.strip().lower()
allowed = [ch if ch.isalnum() else "-" for ch in lowered]
slug = "".join(allowed)
slug = re.sub(r"-+", "-", slug)
return slug.strip("-")
def clear_background_cards_cache() -> None:
"""Clear the memoized background card cache (testing/support)."""
_load_background_cards_cached.cache_clear()
__all__ = [
"BackgroundCard",
"BackgroundCatalog",
"clear_background_cards_cache",
"load_background_cards",
]