mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 15:40:12 +01:00
287 lines
9.6 KiB
Python
287 lines
9.6 KiB
Python
"""Utilities for detecting partner and background mechanics from card data."""
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
import math
|
||
import re
|
||
from typing import Any, Iterable, Tuple, List
|
||
|
||
__all__ = [
|
||
"PartnerBackgroundInfo",
|
||
"analyze_partner_background",
|
||
"extract_partner_with_names",
|
||
]
|
||
|
||
_PARTNER_PATTERN = re.compile(r"\bPartner\b(?!\s+with)", re.IGNORECASE)
|
||
_PARTNER_WITH_PATTERN = re.compile(r"\bPartner with ([^.;\n]+)", re.IGNORECASE)
|
||
_CHOOSE_BACKGROUND_PATTERN = re.compile(r"\bChoose a Background\b", re.IGNORECASE)
|
||
_BACKGROUND_KEYWORD_PATTERN = re.compile(r"\bBackground\b", re.IGNORECASE)
|
||
_FRIENDS_FOREVER_PATTERN = re.compile(r"\bFriends forever\b", re.IGNORECASE)
|
||
_DOCTORS_COMPANION_PATTERN = re.compile(r"Doctor's companion", re.IGNORECASE)
|
||
_PARTNER_RESTRICTION_PATTERN = re.compile(r"\bPartner\b\s*(?:—|-|–|:)", re.IGNORECASE)
|
||
_PARTNER_RESTRICTION_CAPTURE = re.compile(
|
||
r"\bPartner\b\s*(?:—|-|–|:)\s*([^.;\n\r(]+)",
|
||
re.IGNORECASE,
|
||
)
|
||
_PLAIN_PARTNER_THEME_TOKENS = {
|
||
"partner",
|
||
"partners",
|
||
}
|
||
_PARTNER_THEME_TOKENS = {
|
||
"partner",
|
||
"partners",
|
||
"friends forever",
|
||
"doctor's companion",
|
||
}
|
||
|
||
|
||
def _normalize_text(value: Any) -> str:
|
||
if value is None:
|
||
return ""
|
||
if isinstance(value, str):
|
||
text = value
|
||
elif isinstance(value, float):
|
||
if math.isnan(value):
|
||
return ""
|
||
text = str(value)
|
||
else:
|
||
text = str(value)
|
||
stripped = text.strip()
|
||
if stripped.casefold() == "nan":
|
||
return ""
|
||
return text
|
||
|
||
|
||
def _is_background_theme_tag(tag: str) -> bool:
|
||
text = (tag or "").strip().casefold()
|
||
if not text:
|
||
return False
|
||
if "background" not in text:
|
||
return False
|
||
if "choose a background" in text:
|
||
return False
|
||
if "backgrounds matter" in text:
|
||
return False
|
||
normalized = text.replace("—", "-").replace("–", "-")
|
||
if normalized in {"background", "backgrounds", "background card", "background (card type)"}:
|
||
return True
|
||
if normalized.startswith("background -") or normalized.startswith("background:"):
|
||
return True
|
||
if normalized.endswith(" background"):
|
||
return True
|
||
return False
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class PartnerBackgroundInfo:
|
||
"""Aggregated partner/background detection result."""
|
||
|
||
has_partner: bool
|
||
partner_with: Tuple[str, ...]
|
||
choose_background: bool
|
||
is_background: bool
|
||
is_doctor: bool
|
||
is_doctors_companion: bool
|
||
has_plain_partner: bool
|
||
has_restricted_partner: bool
|
||
restricted_partner_labels: Tuple[str, ...]
|
||
|
||
|
||
def _normalize_theme_tags(tags: Iterable[str]) -> Tuple[str, ...]:
|
||
return tuple(tag.strip().lower() for tag in tags if str(tag).strip())
|
||
|
||
|
||
def extract_partner_with_names(oracle_text: str) -> Tuple[str, ...]:
|
||
"""Extract partner-with names from oracle text.
|
||
|
||
Handles mixed separators ("and", "or", "&", "/") while preserving card
|
||
names that include commas (e.g., "Pir, Imaginative Rascal"). Reminder text in
|
||
parentheses is stripped and results are deduplicated while preserving order.
|
||
"""
|
||
|
||
text = _normalize_text(oracle_text)
|
||
if not text:
|
||
return tuple()
|
||
|
||
names: list[str] = []
|
||
seen: set[str] = set()
|
||
for match in _PARTNER_WITH_PATTERN.finditer(text):
|
||
raw_targets = match.group(1)
|
||
# Remove reminder text and trailing punctuation
|
||
until_paren = raw_targets.split("(", 1)[0]
|
||
base_text = until_paren.strip().strip(". ")
|
||
if not base_text:
|
||
continue
|
||
|
||
segments = re.split(r"\s*(?:\band\b|\bor\b|\bplus\b|&|/|\+)\s*", base_text, flags=re.IGNORECASE)
|
||
buffer: List[str] = []
|
||
for token in segments:
|
||
buffer.extend(_split_partner_token(token))
|
||
|
||
for item in buffer:
|
||
cleaned = item.strip().strip("., ")
|
||
if not cleaned:
|
||
continue
|
||
lowered = cleaned.casefold()
|
||
if lowered in seen:
|
||
continue
|
||
seen.add(lowered)
|
||
names.append(cleaned)
|
||
return tuple(names)
|
||
|
||
|
||
_SIMPLE_NAME_TOKEN = re.compile(r"^[A-Za-z0-9'’\-]+$")
|
||
|
||
|
||
def _split_partner_token(token: str) -> List[str]:
|
||
cleaned = (token or "").strip()
|
||
if not cleaned:
|
||
return []
|
||
cleaned = cleaned.strip(",.; ")
|
||
if not cleaned:
|
||
return []
|
||
|
||
parts = [part.strip() for part in cleaned.split(",") if part.strip()]
|
||
if len(parts) <= 1:
|
||
return parts
|
||
|
||
if all(_SIMPLE_NAME_TOKEN.fullmatch(part) for part in parts):
|
||
return parts
|
||
|
||
return [cleaned]
|
||
|
||
|
||
def _has_plain_partner_keyword(oracle_text: str) -> bool:
|
||
oracle_text = _normalize_text(oracle_text)
|
||
if not oracle_text:
|
||
return False
|
||
for raw_line in oracle_text.splitlines():
|
||
line = raw_line.strip()
|
||
if not line:
|
||
continue
|
||
ability = line.split("(", 1)[0].strip()
|
||
if not ability:
|
||
continue
|
||
lowered = ability.casefold()
|
||
if lowered.startswith("partner with"):
|
||
continue
|
||
if lowered.startswith("partner"):
|
||
suffix = ability[7:].strip()
|
||
if suffix and suffix[0] in {"-", "—", "–", ":"}:
|
||
continue
|
||
if suffix:
|
||
# Contains additional text beyond plain Partner keyword
|
||
continue
|
||
return True
|
||
return False
|
||
|
||
|
||
def _has_partner_restriction(oracle_text: str) -> bool:
|
||
oracle_text = _normalize_text(oracle_text)
|
||
if not oracle_text:
|
||
return False
|
||
return bool(_PARTNER_RESTRICTION_PATTERN.search(oracle_text))
|
||
|
||
|
||
def analyze_partner_background(
|
||
type_line: str | None,
|
||
oracle_text: str | None,
|
||
theme_tags: Iterable[str] | None = None,
|
||
) -> PartnerBackgroundInfo:
|
||
"""Detect partner/background mechanics using text and theme tags."""
|
||
|
||
normalized_tags = _normalize_theme_tags(theme_tags or ())
|
||
partner_with = extract_partner_with_names(oracle_text or "")
|
||
type_line_text = _normalize_text(type_line)
|
||
oracle_text_value = _normalize_text(oracle_text)
|
||
choose_background = bool(_CHOOSE_BACKGROUND_PATTERN.search(oracle_text_value))
|
||
theme_partner = any(tag in _PARTNER_THEME_TOKENS for tag in normalized_tags)
|
||
theme_plain_partner = any(tag in _PLAIN_PARTNER_THEME_TOKENS for tag in normalized_tags)
|
||
theme_choose_background = any("choose a background" in tag for tag in normalized_tags)
|
||
theme_is_background = any(_is_background_theme_tag(tag) for tag in normalized_tags)
|
||
friends_forever = bool(_FRIENDS_FOREVER_PATTERN.search(oracle_text_value))
|
||
theme_friends_forever = any(tag == "friends forever" for tag in normalized_tags)
|
||
plain_partner_keyword = _has_plain_partner_keyword(oracle_text_value)
|
||
has_plain_partner = bool(plain_partner_keyword or theme_plain_partner)
|
||
partner_restriction_keyword = _has_partner_restriction(oracle_text_value)
|
||
restricted_labels = _collect_restricted_partner_labels(oracle_text_value, theme_tags)
|
||
has_restricted_partner = bool(
|
||
partner_with
|
||
or partner_restriction_keyword
|
||
or friends_forever
|
||
or theme_friends_forever
|
||
or restricted_labels
|
||
)
|
||
|
||
creature_segment = ""
|
||
if type_line_text:
|
||
if "—" in type_line_text:
|
||
creature_segment = type_line_text.split("—", 1)[1]
|
||
elif "-" in type_line_text:
|
||
creature_segment = type_line_text.split("-", 1)[1]
|
||
else:
|
||
creature_segment = type_line_text
|
||
type_tokens = {part.strip().lower() for part in creature_segment.split() if part.strip()}
|
||
has_time_lord_doctor = {"time", "lord", "doctor"}.issubset(type_tokens)
|
||
is_doctor = bool(has_time_lord_doctor)
|
||
is_doctors_companion = bool(_DOCTORS_COMPANION_PATTERN.search(oracle_text_value))
|
||
if not is_doctors_companion:
|
||
is_doctors_companion = any("doctor" in tag and "companion" in tag for tag in normalized_tags)
|
||
|
||
has_partner = bool(has_plain_partner or has_restricted_partner or theme_partner)
|
||
choose_background = choose_background or theme_choose_background
|
||
is_background = bool(_BACKGROUND_KEYWORD_PATTERN.search(type_line_text)) or theme_is_background
|
||
|
||
return PartnerBackgroundInfo(
|
||
has_partner=has_partner,
|
||
partner_with=partner_with,
|
||
choose_background=choose_background,
|
||
is_background=is_background,
|
||
is_doctor=is_doctor,
|
||
is_doctors_companion=is_doctors_companion,
|
||
has_plain_partner=has_plain_partner,
|
||
has_restricted_partner=has_restricted_partner,
|
||
restricted_partner_labels=restricted_labels,
|
||
)
|
||
|
||
|
||
def _collect_restricted_partner_labels(
|
||
oracle_text: str,
|
||
theme_tags: Iterable[str] | None,
|
||
) -> Tuple[str, ...]:
|
||
labels: list[str] = []
|
||
seen: set[str] = set()
|
||
|
||
def _maybe_add(raw: str | None) -> None:
|
||
if not raw:
|
||
return
|
||
cleaned = raw.strip().strip("-—–: ")
|
||
if not cleaned:
|
||
return
|
||
key = cleaned.casefold()
|
||
if key in seen:
|
||
return
|
||
seen.add(key)
|
||
labels.append(cleaned)
|
||
|
||
oracle_text = _normalize_text(oracle_text)
|
||
for match in _PARTNER_RESTRICTION_CAPTURE.finditer(oracle_text):
|
||
value = match.group(1)
|
||
value = value.split("(", 1)[0]
|
||
value = value.strip().rstrip(".,;:—-– ")
|
||
_maybe_add(value)
|
||
|
||
if theme_tags:
|
||
for tag in theme_tags:
|
||
text = _normalize_text(tag).strip()
|
||
if not text:
|
||
continue
|
||
lowered = text.casefold()
|
||
if not lowered.startswith("partner"):
|
||
continue
|
||
parts = re.split(r"[—\-–:]", text, maxsplit=1)
|
||
if len(parts) < 2:
|
||
continue
|
||
_maybe_add(parts[1])
|
||
|
||
return tuple(labels)
|