2025-10-28 08:21:52 -07:00
|
|
|
"""Loader for background cards derived from all_cards.parquet."""
|
2025-10-06 09:17:59 -07:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import ast
|
2025-10-28 08:21:52 -07:00
|
|
|
import re
|
2025-10-06 09:17:59 -07:00
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from functools import lru_cache
|
|
|
|
|
from pathlib import Path
|
2025-10-28 08:21:52 -07:00
|
|
|
from typing import Any, Mapping, Tuple
|
2025-10-06 09:17:59 -07:00
|
|
|
|
2025-10-18 21:32:12 -07:00
|
|
|
from logging_util import get_logger
|
2025-10-06 09:17:59 -07:00
|
|
|
from deck_builder.partner_background_utils import analyze_partner_background
|
|
|
|
|
|
|
|
|
|
LOGGER = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
|
|
|
class BackgroundCard:
|
|
|
|
|
"""Normalized background card entry."""
|
|
|
|
|
|
|
|
|
|
name: str
|
|
|
|
|
face_name: str | None
|
|
|
|
|
display_name: str
|
|
|
|
|
slug: str
|
|
|
|
|
color_identity: Tuple[str, ...]
|
|
|
|
|
colors: Tuple[str, ...]
|
|
|
|
|
mana_cost: str
|
|
|
|
|
mana_value: float | None
|
|
|
|
|
type_line: str
|
|
|
|
|
oracle_text: str
|
|
|
|
|
keywords: Tuple[str, ...]
|
|
|
|
|
theme_tags: Tuple[str, ...]
|
|
|
|
|
raw_theme_tags: Tuple[str, ...]
|
|
|
|
|
edhrec_rank: int | None
|
|
|
|
|
layout: str
|
|
|
|
|
side: str | None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
|
|
|
class BackgroundCatalog:
|
|
|
|
|
source_path: Path
|
|
|
|
|
etag: str
|
|
|
|
|
mtime_ns: int
|
|
|
|
|
size: int
|
|
|
|
|
version: str
|
|
|
|
|
entries: Tuple[BackgroundCard, ...]
|
|
|
|
|
by_name: Mapping[str, BackgroundCard]
|
|
|
|
|
|
|
|
|
|
def get(self, name: str) -> BackgroundCard | None:
|
|
|
|
|
return self.by_name.get(name.lower())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_background_cards(
|
|
|
|
|
source_path: str | Path | None = None,
|
|
|
|
|
) -> BackgroundCatalog:
|
2025-10-28 08:21:52 -07:00
|
|
|
"""Load and cache background card data from all_cards.parquet."""
|
2025-10-06 09:17:59 -07:00
|
|
|
|
|
|
|
|
resolved = _resolve_background_path(source_path)
|
|
|
|
|
try:
|
|
|
|
|
stat = resolved.stat()
|
|
|
|
|
mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
|
|
|
|
|
size = stat.st_size
|
|
|
|
|
except FileNotFoundError:
|
2025-10-28 08:21:52 -07:00
|
|
|
raise FileNotFoundError(f"Background data not found at {resolved}") from None
|
2025-10-06 09:17:59 -07:00
|
|
|
|
|
|
|
|
entries, version = _load_background_cards_cached(str(resolved), mtime_ns)
|
|
|
|
|
etag = f"{size}-{mtime_ns}-{len(entries)}"
|
|
|
|
|
catalog = BackgroundCatalog(
|
|
|
|
|
source_path=resolved,
|
|
|
|
|
etag=etag,
|
|
|
|
|
mtime_ns=mtime_ns,
|
|
|
|
|
size=size,
|
|
|
|
|
version=version,
|
|
|
|
|
entries=entries,
|
|
|
|
|
by_name={card.display_name.lower(): card for card in entries},
|
|
|
|
|
)
|
|
|
|
|
LOGGER.info("background_cards_loaded count=%s version=%s path=%s", len(entries), version, resolved)
|
|
|
|
|
return catalog
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=4)
|
|
|
|
|
def _load_background_cards_cached(path_str: str, mtime_ns: int) -> Tuple[Tuple[BackgroundCard, ...], str]:
|
|
|
|
|
path = Path(path_str)
|
|
|
|
|
if not path.exists():
|
|
|
|
|
return tuple(), "unknown"
|
|
|
|
|
|
2025-10-28 08:21:52 -07:00
|
|
|
try:
|
|
|
|
|
import pandas as pd
|
|
|
|
|
df = pd.read_parquet(path, engine="pyarrow")
|
|
|
|
|
|
|
|
|
|
# Filter for background cards
|
|
|
|
|
if 'isBackground' not in df.columns:
|
|
|
|
|
LOGGER.warning("isBackground column not found in %s", path)
|
|
|
|
|
return tuple(), "unknown"
|
|
|
|
|
|
|
|
|
|
df_backgrounds = df[df['isBackground']].copy()
|
|
|
|
|
|
|
|
|
|
if len(df_backgrounds) == 0:
|
|
|
|
|
LOGGER.warning("No background cards found in %s", path)
|
|
|
|
|
return tuple(), "unknown"
|
|
|
|
|
|
|
|
|
|
entries = _rows_to_cards(df_backgrounds)
|
|
|
|
|
version = "parquet"
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
LOGGER.error("Failed to load backgrounds from %s: %s", path, e)
|
|
|
|
|
return tuple(), "unknown"
|
2025-10-06 09:17:59 -07:00
|
|
|
|
|
|
|
|
frozen = tuple(entries)
|
|
|
|
|
return frozen, version
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_background_path(override: str | Path | None) -> Path:
|
2025-10-28 08:21:52 -07:00
|
|
|
"""Resolve path to all_cards.parquet."""
|
2025-10-06 09:17:59 -07:00
|
|
|
if override:
|
|
|
|
|
return Path(override).resolve()
|
2025-10-28 08:21:52 -07:00
|
|
|
# Use card_files/processed/all_cards.parquet
|
|
|
|
|
return Path("card_files/processed/all_cards.parquet").resolve()
|
2025-10-06 09:17:59 -07:00
|
|
|
|
|
|
|
|
|
2025-10-28 08:21:52 -07:00
|
|
|
def _rows_to_cards(df) -> list[BackgroundCard]:
|
|
|
|
|
"""Convert DataFrame rows to BackgroundCard objects."""
|
2025-10-06 09:17:59 -07:00
|
|
|
entries: list[BackgroundCard] = []
|
|
|
|
|
seen: set[str] = set()
|
2025-10-28 08:21:52 -07:00
|
|
|
|
|
|
|
|
for _, row in df.iterrows():
|
|
|
|
|
if row.empty:
|
2025-10-06 09:17:59 -07:00
|
|
|
continue
|
2025-10-28 08:21:52 -07:00
|
|
|
card = _row_to_card(row)
|
2025-10-06 09:17:59 -07:00
|
|
|
if card is None:
|
|
|
|
|
continue
|
|
|
|
|
key = card.display_name.lower()
|
|
|
|
|
if key in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(key)
|
|
|
|
|
entries.append(card)
|
2025-10-28 08:21:52 -07:00
|
|
|
|
2025-10-06 09:17:59 -07:00
|
|
|
entries.sort(key=lambda card: card.display_name)
|
|
|
|
|
return entries
|
|
|
|
|
|
|
|
|
|
|
2025-10-28 08:21:52 -07:00
|
|
|
def _row_to_card(row) -> BackgroundCard | None:
|
|
|
|
|
"""Convert a DataFrame row to a BackgroundCard."""
|
|
|
|
|
# Helper to safely get values from DataFrame row
|
|
|
|
|
def get_val(key: str):
|
|
|
|
|
try:
|
|
|
|
|
if hasattr(row, key):
|
|
|
|
|
val = getattr(row, key)
|
|
|
|
|
# Handle pandas NA/None
|
|
|
|
|
if val is None or (hasattr(val, '__class__') and 'NA' in val.__class__.__name__):
|
|
|
|
|
return None
|
|
|
|
|
return val
|
|
|
|
|
return None
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
name = _clean_str(get_val("name"))
|
|
|
|
|
face_name = _clean_str(get_val("faceName")) or None
|
2025-10-06 09:17:59 -07:00
|
|
|
display = face_name or name
|
|
|
|
|
if not display:
|
|
|
|
|
return None
|
|
|
|
|
|
2025-10-28 08:21:52 -07:00
|
|
|
type_line = _clean_str(get_val("type"))
|
|
|
|
|
oracle_text = _clean_multiline(get_val("text"))
|
|
|
|
|
raw_theme_tags = tuple(_parse_literal_list(get_val("themeTags")))
|
2025-10-06 09:17:59 -07:00
|
|
|
detection = analyze_partner_background(type_line, oracle_text, raw_theme_tags)
|
|
|
|
|
if not detection.is_background:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return BackgroundCard(
|
|
|
|
|
name=name,
|
|
|
|
|
face_name=face_name,
|
|
|
|
|
display_name=display,
|
|
|
|
|
slug=_slugify(display),
|
2025-10-28 08:21:52 -07:00
|
|
|
color_identity=_parse_color_list(get_val("colorIdentity")),
|
|
|
|
|
colors=_parse_color_list(get_val("colors")),
|
|
|
|
|
mana_cost=_clean_str(get_val("manaCost")),
|
|
|
|
|
mana_value=_parse_float(get_val("manaValue")),
|
2025-10-06 09:17:59 -07:00
|
|
|
type_line=type_line,
|
|
|
|
|
oracle_text=oracle_text,
|
2025-10-28 08:21:52 -07:00
|
|
|
keywords=tuple(_split_list(get_val("keywords"))),
|
2025-10-06 09:17:59 -07:00
|
|
|
theme_tags=tuple(tag for tag in raw_theme_tags if tag),
|
|
|
|
|
raw_theme_tags=raw_theme_tags,
|
2025-10-28 08:21:52 -07:00
|
|
|
edhrec_rank=_parse_int(get_val("edhrecRank")),
|
|
|
|
|
layout=_clean_str(get_val("layout")) or "normal",
|
|
|
|
|
side=_clean_str(get_val("side")) or None,
|
2025-10-06 09:17:59 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _clean_str(value: object) -> str:
|
|
|
|
|
if value is None:
|
|
|
|
|
return ""
|
|
|
|
|
return str(value).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _clean_multiline(value: object) -> str:
|
|
|
|
|
if value is None:
|
|
|
|
|
return ""
|
|
|
|
|
text = str(value).replace("\r\n", "\n").replace("\r", "\n")
|
|
|
|
|
return "\n".join(line.rstrip() for line in text.splitlines())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_literal_list(value: object) -> list[str]:
|
|
|
|
|
if value is None:
|
|
|
|
|
return []
|
2025-10-28 08:21:52 -07:00
|
|
|
|
|
|
|
|
# Check if it's a numpy array (from Parquet/pandas)
|
|
|
|
|
is_numpy = False
|
|
|
|
|
try:
|
|
|
|
|
import numpy as np
|
|
|
|
|
is_numpy = isinstance(value, np.ndarray)
|
|
|
|
|
except ImportError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Handle lists, tuples, sets, and numpy arrays
|
|
|
|
|
if isinstance(value, (list, tuple, set)) or is_numpy:
|
2025-10-06 09:17:59 -07:00
|
|
|
return [str(item).strip() for item in value if str(item).strip()]
|
2025-10-28 08:21:52 -07:00
|
|
|
|
2025-10-06 09:17:59 -07:00
|
|
|
text = str(value).strip()
|
|
|
|
|
if not text:
|
|
|
|
|
return []
|
|
|
|
|
try:
|
|
|
|
|
parsed = ast.literal_eval(text)
|
|
|
|
|
except Exception:
|
|
|
|
|
parsed = None
|
|
|
|
|
if isinstance(parsed, (list, tuple, set)):
|
|
|
|
|
return [str(item).strip() for item in parsed if str(item).strip()]
|
|
|
|
|
parts = [part.strip() for part in text.replace(";", ",").split(",")]
|
|
|
|
|
return [part for part in parts if part]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _split_list(value: object) -> list[str]:
|
2025-10-28 08:21:52 -07:00
|
|
|
# Check if it's a numpy array (from Parquet/pandas)
|
|
|
|
|
is_numpy = False
|
|
|
|
|
try:
|
|
|
|
|
import numpy as np
|
|
|
|
|
is_numpy = isinstance(value, np.ndarray)
|
|
|
|
|
except ImportError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if isinstance(value, (list, tuple, set)) or is_numpy:
|
|
|
|
|
return [str(item).strip() for item in value if str(item).strip()]
|
|
|
|
|
|
2025-10-06 09:17:59 -07:00
|
|
|
text = _clean_str(value)
|
|
|
|
|
if not text:
|
|
|
|
|
return []
|
|
|
|
|
parts = [part.strip() for part in text.split(",")]
|
|
|
|
|
return [part for part in parts if part]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_color_list(value: object) -> Tuple[str, ...]:
|
2025-10-28 08:21:52 -07:00
|
|
|
# Check if it's a numpy array (from Parquet/pandas)
|
|
|
|
|
is_numpy = False
|
|
|
|
|
try:
|
|
|
|
|
import numpy as np
|
|
|
|
|
is_numpy = isinstance(value, np.ndarray)
|
|
|
|
|
except ImportError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if isinstance(value, (list, tuple, set)) or is_numpy:
|
|
|
|
|
parts = [str(item).strip().upper() for item in value if str(item).strip()]
|
|
|
|
|
return tuple(parts)
|
|
|
|
|
|
2025-10-06 09:17:59 -07:00
|
|
|
text = _clean_str(value)
|
|
|
|
|
if not text:
|
|
|
|
|
return tuple()
|
|
|
|
|
parts = [part.strip().upper() for part in text.split(",")]
|
|
|
|
|
return tuple(part for part in parts if part)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_float(value: object) -> float | None:
|
|
|
|
|
text = _clean_str(value)
|
|
|
|
|
if not text:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
return float(text)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_int(value: object) -> int | None:
|
|
|
|
|
text = _clean_str(value)
|
|
|
|
|
if not text:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
return int(float(text))
|
|
|
|
|
except ValueError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _slugify(value: str) -> str:
|
|
|
|
|
lowered = value.strip().lower()
|
|
|
|
|
allowed = [ch if ch.isalnum() else "-" for ch in lowered]
|
|
|
|
|
slug = "".join(allowed)
|
|
|
|
|
slug = re.sub(r"-+", "-", slug)
|
|
|
|
|
return slug.strip("-")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clear_background_cards_cache() -> None:
|
|
|
|
|
"""Clear the memoized background card cache (testing/support)."""
|
|
|
|
|
|
|
|
|
|
_load_background_cards_cached.cache_clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
"BackgroundCard",
|
|
|
|
|
"BackgroundCatalog",
|
|
|
|
|
"clear_background_cards_cache",
|
|
|
|
|
"load_background_cards",
|
|
|
|
|
]
|