"""Utilities for detecting partner and background mechanics from card data.""" from __future__ import annotations from dataclasses import dataclass import math import re from typing import Any, Iterable, Tuple, List __all__ = [ "PartnerBackgroundInfo", "analyze_partner_background", "extract_partner_with_names", ] _PARTNER_PATTERN = re.compile(r"\bPartner\b(?!\s+with)", re.IGNORECASE) _PARTNER_WITH_PATTERN = re.compile(r"\bPartner with ([^.;\n]+)", re.IGNORECASE) _CHOOSE_BACKGROUND_PATTERN = re.compile(r"\bChoose a Background\b", re.IGNORECASE) _BACKGROUND_KEYWORD_PATTERN = re.compile(r"\bBackground\b", re.IGNORECASE) _FRIENDS_FOREVER_PATTERN = re.compile(r"\bFriends forever\b", re.IGNORECASE) _DOCTORS_COMPANION_PATTERN = re.compile(r"Doctor's companion", re.IGNORECASE) _PARTNER_RESTRICTION_PATTERN = re.compile(r"\bPartner\b\s*(?:—|-|–|:)", re.IGNORECASE) _PARTNER_RESTRICTION_CAPTURE = re.compile( r"\bPartner\b\s*(?:—|-|–|:)\s*([^.;\n\r(]+)", re.IGNORECASE, ) _PLAIN_PARTNER_THEME_TOKENS = { "partner", "partners", } _PARTNER_THEME_TOKENS = { "partner", "partners", "friends forever", "doctor's companion", } def _normalize_text(value: Any) -> str: if value is None: return "" if isinstance(value, str): text = value elif isinstance(value, float): if math.isnan(value): return "" text = str(value) else: text = str(value) stripped = text.strip() if stripped.casefold() == "nan": return "" return text def _is_background_theme_tag(tag: str) -> bool: text = (tag or "").strip().casefold() if not text: return False if "background" not in text: return False if "choose a background" in text: return False if "backgrounds matter" in text: return False normalized = text.replace("—", "-").replace("–", "-") if normalized in {"background", "backgrounds", "background card", "background (card type)"}: return True if normalized.startswith("background -") or normalized.startswith("background:"): return True if normalized.endswith(" background"): return True return False @dataclass(frozen=True) class PartnerBackgroundInfo: """Aggregated partner/background detection result.""" has_partner: bool partner_with: Tuple[str, ...] choose_background: bool is_background: bool is_doctor: bool is_doctors_companion: bool has_plain_partner: bool has_restricted_partner: bool restricted_partner_labels: Tuple[str, ...] def _normalize_theme_tags(tags: Iterable[str]) -> Tuple[str, ...]: return tuple(tag.strip().lower() for tag in tags if str(tag).strip()) def extract_partner_with_names(oracle_text: str) -> Tuple[str, ...]: """Extract partner-with names from oracle text. Handles mixed separators ("and", "or", "&", "/") while preserving card names that include commas (e.g., "Pir, Imaginative Rascal"). Reminder text in parentheses is stripped and results are deduplicated while preserving order. """ text = _normalize_text(oracle_text) if not text: return tuple() names: list[str] = [] seen: set[str] = set() for match in _PARTNER_WITH_PATTERN.finditer(text): raw_targets = match.group(1) # Remove reminder text and trailing punctuation until_paren = raw_targets.split("(", 1)[0] base_text = until_paren.strip().strip(". ") if not base_text: continue segments = re.split(r"\s*(?:\band\b|\bor\b|\bplus\b|&|/|\+)\s*", base_text, flags=re.IGNORECASE) buffer: List[str] = [] for token in segments: buffer.extend(_split_partner_token(token)) for item in buffer: cleaned = item.strip().strip("., ") if not cleaned: continue lowered = cleaned.casefold() if lowered in seen: continue seen.add(lowered) names.append(cleaned) return tuple(names) _SIMPLE_NAME_TOKEN = re.compile(r"^[A-Za-z0-9'’\-]+$") def _split_partner_token(token: str) -> List[str]: cleaned = (token or "").strip() if not cleaned: return [] cleaned = cleaned.strip(",.; ") if not cleaned: return [] parts = [part.strip() for part in cleaned.split(",") if part.strip()] if len(parts) <= 1: return parts if all(_SIMPLE_NAME_TOKEN.fullmatch(part) for part in parts): return parts return [cleaned] def _has_plain_partner_keyword(oracle_text: str) -> bool: oracle_text = _normalize_text(oracle_text) if not oracle_text: return False for raw_line in oracle_text.splitlines(): line = raw_line.strip() if not line: continue ability = line.split("(", 1)[0].strip() if not ability: continue lowered = ability.casefold() if lowered.startswith("partner with"): continue if lowered.startswith("partner"): suffix = ability[7:].strip() if suffix and suffix[0] in {"-", "—", "–", ":"}: continue if suffix: # Contains additional text beyond plain Partner keyword continue return True return False def _has_partner_restriction(oracle_text: str) -> bool: oracle_text = _normalize_text(oracle_text) if not oracle_text: return False return bool(_PARTNER_RESTRICTION_PATTERN.search(oracle_text)) def analyze_partner_background( type_line: str | None, oracle_text: str | None, theme_tags: Iterable[str] | None = None, ) -> PartnerBackgroundInfo: """Detect partner/background mechanics using text and theme tags.""" normalized_tags = _normalize_theme_tags(theme_tags or ()) partner_with = extract_partner_with_names(oracle_text or "") type_line_text = _normalize_text(type_line) oracle_text_value = _normalize_text(oracle_text) choose_background = bool(_CHOOSE_BACKGROUND_PATTERN.search(oracle_text_value)) theme_partner = any(tag in _PARTNER_THEME_TOKENS for tag in normalized_tags) theme_plain_partner = any(tag in _PLAIN_PARTNER_THEME_TOKENS for tag in normalized_tags) theme_choose_background = any("choose a background" in tag for tag in normalized_tags) theme_is_background = any(_is_background_theme_tag(tag) for tag in normalized_tags) friends_forever = bool(_FRIENDS_FOREVER_PATTERN.search(oracle_text_value)) theme_friends_forever = any(tag == "friends forever" for tag in normalized_tags) plain_partner_keyword = _has_plain_partner_keyword(oracle_text_value) has_plain_partner = bool(plain_partner_keyword or theme_plain_partner) partner_restriction_keyword = _has_partner_restriction(oracle_text_value) restricted_labels = _collect_restricted_partner_labels(oracle_text_value, theme_tags) has_restricted_partner = bool( partner_with or partner_restriction_keyword or friends_forever or theme_friends_forever or restricted_labels ) creature_segment = "" if type_line_text: if "—" in type_line_text: creature_segment = type_line_text.split("—", 1)[1] elif "-" in type_line_text: creature_segment = type_line_text.split("-", 1)[1] else: creature_segment = type_line_text type_tokens = {part.strip().lower() for part in creature_segment.split() if part.strip()} has_time_lord_doctor = {"time", "lord", "doctor"}.issubset(type_tokens) is_doctor = bool(has_time_lord_doctor) is_doctors_companion = bool(_DOCTORS_COMPANION_PATTERN.search(oracle_text_value)) if not is_doctors_companion: is_doctors_companion = any("doctor" in tag and "companion" in tag for tag in normalized_tags) has_partner = bool(has_plain_partner or has_restricted_partner or theme_partner) choose_background = choose_background or theme_choose_background is_background = bool(_BACKGROUND_KEYWORD_PATTERN.search(type_line_text)) or theme_is_background return PartnerBackgroundInfo( has_partner=has_partner, partner_with=partner_with, choose_background=choose_background, is_background=is_background, is_doctor=is_doctor, is_doctors_companion=is_doctors_companion, has_plain_partner=has_plain_partner, has_restricted_partner=has_restricted_partner, restricted_partner_labels=restricted_labels, ) def _collect_restricted_partner_labels( oracle_text: str, theme_tags: Iterable[str] | None, ) -> Tuple[str, ...]: labels: list[str] = [] seen: set[str] = set() def _maybe_add(raw: str | None) -> None: if not raw: return cleaned = raw.strip().strip("-—–: ") if not cleaned: return key = cleaned.casefold() if key in seen: return seen.add(key) labels.append(cleaned) oracle_text = _normalize_text(oracle_text) for match in _PARTNER_RESTRICTION_CAPTURE.finditer(oracle_text): value = match.group(1) value = value.split("(", 1)[0] value = value.strip().rstrip(".,;:—-– ") _maybe_add(value) if theme_tags: for tag in theme_tags: text = _normalize_text(tag).strip() if not text: continue lowered = text.casefold() if not lowered.startswith("partner"): continue parts = re.split(r"[—\-–:]", text, maxsplit=1) if len(parts) < 2: continue _maybe_add(parts[1]) return tuple(labels)