mtg_python_deckbuilder/code/deck_builder/suggestions.py

662 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Partner suggestion scoring helpers.
This module provides a scoring helper that ranks potential partner/background
pairings for a selected primary commander. It consumes the normalized metadata
emitted by ``build_partner_suggestions.py`` (themes, role tags, partner flags,
and pairing telemetry) and blends several weighted components:
* Shared theme overlap (normalized Jaccard/role-aware) baseline synergy.
* Theme adjacency (deck export co-occurrence + curated overrides).
* Color compatibility (prefers compact color changes).
* Mechanic affinity (Partner With, Doctor/Companion, Background matches).
* Penalties (illegal configurations, missing tags, restricted conflicts).
Weights are mode-specific so future tuning can adjust emphasis without
rewriting the algorithm. The public ``score_partner_candidate`` helper returns
both the aggregate score and a component breakdown for diagnostics.
"""
from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from typing import Dict, Iterable, Mapping, MutableMapping, Sequence
from .combined_commander import PartnerMode
__all__ = [
"PartnerSuggestionContext",
"ScoreWeights",
"ScoreResult",
"MODE_WEIGHTS",
"score_partner_candidate",
"is_noise_theme",
]
def _clean_str(value: object) -> str:
if value is None:
return ""
return str(value).strip()
def _normalize_token(value: str | None) -> str:
return _clean_str(value).casefold()
def _commander_name(payload: Mapping[str, object]) -> str:
name = _clean_str(payload.get("display_name")) or _clean_str(payload.get("name"))
return name or "Unknown Commander"
def _commander_key(payload: Mapping[str, object]) -> str:
return _normalize_token(_commander_name(payload))
def _sequence(payload: Mapping[str, object], key: str) -> tuple[str, ...]:
raw = payload.get(key)
if raw is None:
return tuple()
if isinstance(raw, (list, tuple)):
return tuple(_clean_str(item) for item in raw if _clean_str(item))
return tuple(filter(None, (_clean_str(raw),)))
_EXCLUDED_THEME_TOKENS = {
"legends matter",
"historics matter",
"partner",
"partner - survivors",
}
def _theme_should_be_excluded(theme: str) -> bool:
token = _normalize_token(theme)
if not token:
return False
if token in _EXCLUDED_THEME_TOKENS:
return True
return "kindred" in token
def is_noise_theme(theme: str | None) -> bool:
"""Return True when the provided theme is considered too generic/noisy.
The partner suggestion UI should suppress these themes from overlap summaries to
keep recommendations focused on distinctive archetypes.
"""
if theme is None:
return False
return _theme_should_be_excluded(theme)
def _theme_sequence(payload: Mapping[str, object], key: str = "themes") -> tuple[str, ...]:
return tuple(
theme
for theme in _sequence(payload, key)
if not _theme_should_be_excluded(theme)
)
def _normalize_string_set(values: Iterable[str]) -> tuple[str, ...]:
seen: set[str] = set()
collected: list[str] = []
for value in values:
token = _clean_str(value)
if not token:
continue
key = token.casefold()
if key in seen:
continue
seen.add(key)
collected.append(token)
return tuple(collected)
@dataclass(frozen=True)
class ScoreWeights:
"""Weight multipliers for each scoring component."""
overlap: float
synergy: float
color: float
affinity: float
penalty: float
@dataclass(frozen=True)
class ScoreResult:
"""Result returned by :func:`score_partner_candidate`."""
score: float
mode: PartnerMode
components: Mapping[str, float]
notes: tuple[str, ...]
weights: ScoreWeights
class PartnerSuggestionContext:
"""Container for suggestion dataset fragments used during scoring."""
def __init__(
self,
*,
theme_cooccurrence: Mapping[str, Mapping[str, int]] | None = None,
pairing_counts: Mapping[tuple[str, str, str], int] | None = None,
curated_synergy: Mapping[tuple[str, str], float] | None = None,
) -> None:
self._theme_cooccurrence: Dict[str, Dict[str, float]] = {}
self._pairing_counts: Dict[tuple[str, str, str], float] = {}
self._curated_synergy: Dict[tuple[str, str], float] = {}
max_co = 0
if theme_cooccurrence:
for theme, neighbors in theme_cooccurrence.items():
theme_key = _normalize_token(theme)
if not theme_key:
continue
store: Dict[str, float] = {}
for other, count in neighbors.items():
other_key = _normalize_token(other)
if not other_key:
continue
value = float(count or 0)
if value <= 0:
continue
store[other_key] = value
max_co = max(max_co, value)
if store:
self._theme_cooccurrence[theme_key] = store
self._theme_co_max = max(max_co, 1.0)
max_pair = 0
if pairing_counts:
for key, count in pairing_counts.items():
if not isinstance(key, tuple) or len(key) != 3:
continue
mode, primary, secondary = key
norm_key = (
_normalize_token(mode),
_normalize_token(primary),
_normalize_token(secondary),
)
value = float(count or 0)
if value <= 0:
continue
self._pairing_counts[norm_key] = value
# Store symmetric entry to simplify lookups.
symmetric = (
_normalize_token(mode),
_normalize_token(secondary),
_normalize_token(primary),
)
self._pairing_counts[symmetric] = value
max_pair = max(max_pair, value)
self._pairing_max = max(max_pair, 1.0)
if curated_synergy:
for key, value in curated_synergy.items():
if not isinstance(key, tuple) or len(key) != 2:
continue
primary, secondary = key
normalized = (
_normalize_token(primary),
_normalize_token(secondary),
)
if value is None:
continue
magnitude = max(0.0, float(value))
if magnitude <= 0:
continue
self._curated_synergy[normalized] = min(1.0, magnitude)
self._curated_synergy[(normalized[1], normalized[0])] = min(1.0, magnitude)
@classmethod
def from_dataset(cls, payload: Mapping[str, object] | None) -> "PartnerSuggestionContext":
if not payload:
return cls()
themes_raw = payload.get("themes")
theme_cooccurrence: Dict[str, Dict[str, int]] = {}
if isinstance(themes_raw, Mapping):
for theme_key, entry in themes_raw.items():
co = entry.get("co_occurrence") if isinstance(entry, Mapping) else None
if not isinstance(co, Mapping):
continue
inner: Dict[str, int] = {}
for other, info in co.items():
if isinstance(info, Mapping):
count = info.get("count")
else:
count = info
try:
inner[str(other)] = int(count)
except Exception:
continue
theme_cooccurrence[str(theme_key)] = inner
pairings = payload.get("pairings")
pairing_counts: Dict[tuple[str, str, str], int] = {}
if isinstance(pairings, Mapping):
records = pairings.get("records")
if isinstance(records, Sequence):
for entry in records:
if not isinstance(entry, Mapping):
continue
mode = str(entry.get("mode", "unknown"))
primary = str(entry.get("primary_canonical") or entry.get("primary") or "")
secondary = str(entry.get("secondary_canonical") or entry.get("secondary") or "")
if not primary or not secondary:
continue
try:
count = int(entry.get("count", 0))
except Exception:
continue
pairing_counts[(mode, primary, secondary)] = count
curated = payload.get("curated_overrides")
curated_synergy: Dict[tuple[str, str], float] = {}
if isinstance(curated, Mapping):
entries = curated.get("entries")
if isinstance(entries, Mapping):
for raw_key, raw_value in entries.items():
if not isinstance(raw_key, str):
continue
parts = [part.strip() for part in raw_key.split("::") if part.strip()]
if len(parts) != 2:
continue
try:
magnitude = float(raw_value)
except Exception:
continue
curated_synergy[(parts[0], parts[1])] = magnitude
return cls(
theme_cooccurrence=theme_cooccurrence,
pairing_counts=pairing_counts,
curated_synergy=curated_synergy,
)
@lru_cache(maxsize=256)
def theme_synergy(self, theme_a: str, theme_b: str) -> float:
key_a = _normalize_token(theme_a)
key_b = _normalize_token(theme_b)
if not key_a or not key_b or key_a == key_b:
return 0.0
co = self._theme_cooccurrence.get(key_a, {})
value = co.get(key_b, 0.0)
normalized = value / self._theme_co_max
curated = self._curated_synergy.get((key_a, key_b), 0.0)
return max(0.0, min(1.0, max(normalized, curated)))
@lru_cache(maxsize=128)
def pairing_strength(self, mode: PartnerMode, primary: str, secondary: str) -> float:
key = (
mode.value,
_normalize_token(primary),
_normalize_token(secondary),
)
value = self._pairing_counts.get(key, 0.0)
return max(0.0, min(1.0, value / self._pairing_max))
DEFAULT_WEIGHTS = ScoreWeights(
overlap=0.45,
synergy=0.25,
color=0.15,
affinity=0.10,
penalty=0.20,
)
MODE_WEIGHTS: Mapping[PartnerMode, ScoreWeights] = {
PartnerMode.PARTNER: DEFAULT_WEIGHTS,
PartnerMode.PARTNER_WITH: ScoreWeights(overlap=0.40, synergy=0.20, color=0.10, affinity=0.20, penalty=0.25),
PartnerMode.BACKGROUND: ScoreWeights(overlap=0.50, synergy=0.30, color=0.10, affinity=0.10, penalty=0.25),
PartnerMode.DOCTOR_COMPANION: ScoreWeights(overlap=0.30, synergy=0.20, color=0.10, affinity=0.30, penalty=0.25),
PartnerMode.NONE: DEFAULT_WEIGHTS,
}
def _clamp(value: float, minimum: float = 0.0, maximum: float = 1.0) -> float:
if value < minimum:
return minimum
if value > maximum:
return maximum
return value
def score_partner_candidate(
primary: Mapping[str, object],
candidate: Mapping[str, object],
*,
mode: PartnerMode | str | None = None,
context: PartnerSuggestionContext | None = None,
) -> ScoreResult:
"""Score a partner/background candidate for the provided primary.
Args:
primary: Commander metadata dictionary (as produced by the dataset).
candidate: Potential partner/background metadata dictionary.
mode: Desired partner mode (auto-detected when omitted).
context: Optional suggestion context providing theme/pairing statistics.
Returns:
ScoreResult with aggregate score ``0.0`` ``1.0`` and component details.
"""
mode = _resolve_mode(primary, candidate, mode)
weights = MODE_WEIGHTS.get(mode, DEFAULT_WEIGHTS)
ctx = context or PartnerSuggestionContext()
overlap = _theme_overlap(primary, candidate)
synergy = _theme_synergy(primary, candidate, ctx)
color_value = _color_compatibility(primary, candidate)
affinity, affinity_notes, affinity_penalties = _mechanic_affinity(primary, candidate, mode, ctx)
penalty_value, penalty_notes = _collect_penalties(primary, candidate, mode, affinity_penalties)
positive_total = weights.overlap + weights.synergy + weights.color + weights.affinity
positive_total = positive_total or 1.0
blended = (
weights.overlap * overlap
+ weights.synergy * synergy
+ weights.color * color_value
+ weights.affinity * affinity
) / positive_total
adjusted = blended - weights.penalty * penalty_value
final_score = _clamp(adjusted)
notes = tuple(note for note in (*affinity_notes, *penalty_notes) if note)
components = {
"overlap": overlap,
"synergy": synergy,
"color": color_value,
"affinity": affinity,
"penalty": penalty_value,
}
return ScoreResult(
score=final_score,
mode=mode,
components=components,
notes=notes,
weights=weights,
)
def _resolve_mode(
primary: Mapping[str, object],
candidate: Mapping[str, object],
provided: PartnerMode | str | None,
) -> PartnerMode:
if isinstance(provided, PartnerMode):
return provided
if isinstance(provided, str) and provided:
normalized = provided.replace("-", "_").strip().casefold()
for mode in PartnerMode:
if mode.value == normalized:
return mode
partner_meta_primary = _partner_meta(primary)
partner_meta_candidate = _partner_meta(candidate)
candidate_name = _commander_name(candidate)
if partner_meta_candidate.get("is_background"):
return PartnerMode.BACKGROUND
partner_with = {
_normalize_token(name)
for name in partner_meta_primary.get("partner_with", [])
}
if partner_with and _normalize_token(candidate_name) in partner_with:
return PartnerMode.PARTNER_WITH
if partner_meta_primary.get("is_doctor") and partner_meta_candidate.get("is_doctors_companion"):
return PartnerMode.DOCTOR_COMPANION
if partner_meta_primary.get("is_doctors_companion") and partner_meta_candidate.get("is_doctor"):
return PartnerMode.DOCTOR_COMPANION
if partner_meta_primary.get("has_partner") and partner_meta_candidate.get("has_partner"):
return PartnerMode.PARTNER
if partner_meta_candidate.get("supports_backgrounds") and partner_meta_primary.get("is_background"):
return PartnerMode.BACKGROUND
if partner_meta_candidate.get("has_partner"):
return PartnerMode.PARTNER
return PartnerMode.PARTNER
def _partner_meta(payload: Mapping[str, object]) -> MutableMapping[str, object]:
meta = payload.get("partner")
if isinstance(meta, Mapping):
return dict(meta)
return {}
def _theme_overlap(primary: Mapping[str, object], candidate: Mapping[str, object]) -> float:
theme_primary = {
_normalize_token(theme)
for theme in _theme_sequence(primary)
}
theme_candidate = {
_normalize_token(theme)
for theme in _theme_sequence(candidate)
}
theme_primary.discard("")
theme_candidate.discard("")
role_primary = {
_normalize_token(tag)
for tag in _sequence(primary, "role_tags")
}
role_candidate = {
_normalize_token(tag)
for tag in _sequence(candidate, "role_tags")
}
role_primary.discard("")
role_candidate.discard("")
# Base Jaccard over theme tags.
union = theme_primary | theme_candidate
if not union:
base = 0.0
else:
base = len(theme_primary & theme_candidate) / len(union)
# Role-aware bonus (weighted at 30% of overlap component).
role_union = role_primary | role_candidate
if not role_union:
role_score = 0.0
else:
role_score = len(role_primary & role_candidate) / len(role_union)
combined = 0.7 * base + 0.3 * role_score
return _clamp(combined)
def _theme_synergy(
primary: Mapping[str, object],
candidate: Mapping[str, object],
context: PartnerSuggestionContext,
) -> float:
themes_primary = _theme_sequence(primary)
themes_candidate = _theme_sequence(candidate)
if not themes_primary or not themes_candidate:
return 0.0
total = 0.0
weight = 0
for theme_a in themes_primary:
for theme_b in themes_candidate:
value = context.theme_synergy(theme_a, theme_b)
if value <= 0:
continue
total += value
weight += 1
if weight == 0:
return 0.0
average = total / weight
# Observed pairing signal augments synergy.
primary_name = _commander_name(primary)
candidate_name = _commander_name(candidate)
observed_partner = context.pairing_strength(PartnerMode.PARTNER, primary_name, candidate_name)
observed_background = context.pairing_strength(PartnerMode.BACKGROUND, primary_name, candidate_name)
observed_doctor = context.pairing_strength(PartnerMode.DOCTOR_COMPANION, primary_name, candidate_name)
observed_any = max(observed_partner, observed_background, observed_doctor)
return _clamp(max(average, observed_any))
def _color_compatibility(primary: Mapping[str, object], candidate: Mapping[str, object]) -> float:
primary_colors = {
_clean_str(color).upper()
for color in _sequence(primary, "color_identity")
}
candidate_colors = {
_clean_str(color).upper()
for color in _sequence(candidate, "color_identity")
}
if not candidate_colors:
# Colorless partners still provide value when primary is colored.
return 0.6 if primary_colors else 0.0
overlap = primary_colors & candidate_colors
union = primary_colors | candidate_colors
overlap_ratio = len(overlap) / max(len(candidate_colors), 1)
added_colors = len(union) - len(primary_colors)
if added_colors <= 0:
delta = 1.0
elif added_colors == 1:
delta = 0.75
elif added_colors == 2:
delta = 0.45
else:
delta = 0.20
colorless_bonus = 0.1 if candidate_colors == {"C"} else 0.0
blended = 0.6 * overlap_ratio + 0.4 * delta + colorless_bonus
return _clamp(blended)
def _mechanic_affinity(
primary: Mapping[str, object],
candidate: Mapping[str, object],
mode: PartnerMode,
context: PartnerSuggestionContext,
) -> tuple[float, list[str], list[tuple[str, float]]]:
primary_meta = _partner_meta(primary)
candidate_meta = _partner_meta(candidate)
primary_name = _commander_name(primary)
candidate_name = _commander_name(candidate)
notes: list[str] = []
penalties: list[tuple[str, float]] = []
score = 0.0
if mode is PartnerMode.PARTNER_WITH:
partner_with = {
_normalize_token(name)
for name in primary_meta.get("partner_with", [])
}
if partner_with and _normalize_token(candidate_name) in partner_with:
score = 1.0
notes.append("partner_with_match")
else:
penalties.append(("missing_partner_with_link", 0.9))
elif mode is PartnerMode.BACKGROUND:
if candidate_meta.get("is_background") and primary_meta.get("supports_backgrounds"):
score = 0.9
notes.append("background_compatible")
else:
if not candidate_meta.get("is_background"):
penalties.append(("candidate_not_background", 1.0))
if not primary_meta.get("supports_backgrounds"):
penalties.append(("primary_cannot_use_background", 1.0))
elif mode is PartnerMode.DOCTOR_COMPANION:
primary_is_doctor = bool(primary_meta.get("is_doctor"))
primary_is_companion = bool(primary_meta.get("is_doctors_companion"))
candidate_is_doctor = bool(candidate_meta.get("is_doctor"))
candidate_is_companion = bool(candidate_meta.get("is_doctors_companion"))
if primary_is_doctor and candidate_is_companion:
score = 1.0
notes.append("doctor_companion_match")
elif primary_is_companion and candidate_is_doctor:
score = 1.0
notes.append("doctor_companion_match")
else:
penalties.append(("doctor_pairing_illegal", 1.0))
else: # Partner-style default
if primary_meta.get("has_partner") and candidate_meta.get("has_partner"):
score = 0.6
notes.append("shared_partner_keyword")
else:
penalties.append(("missing_partner_keyword", 1.0))
primary_labels = {
_normalize_token(label)
for label in _sequence(primary_meta, "restricted_partner_labels")
}
candidate_labels = {
_normalize_token(label)
for label in _sequence(candidate_meta, "restricted_partner_labels")
}
shared_labels = primary_labels & candidate_labels
if primary_labels or candidate_labels:
if shared_labels:
score = max(score, 0.85)
notes.append("restricted_label_match")
else:
penalties.append(("restricted_label_mismatch", 0.7))
observed = context.pairing_strength(mode, primary_name, candidate_name)
if observed > 0:
score = max(score, observed)
notes.append("observed_pairing")
return _clamp(score), notes, penalties
def _collect_penalties(
primary: Mapping[str, object],
candidate: Mapping[str, object],
mode: PartnerMode,
extra: Iterable[tuple[str, float]],
) -> tuple[float, list[str]]:
penalties: list[tuple[str, float]] = list(extra)
themes_primary_raw = _sequence(primary, "themes")
themes_candidate_raw = _sequence(candidate, "themes")
themes_primary = _theme_sequence(primary)
themes_candidate = _theme_sequence(candidate)
if (not themes_primary or not themes_candidate) and (not themes_primary_raw or not themes_candidate_raw):
penalties.append(("missing_theme_metadata", 0.5))
if mode is PartnerMode.PARTNER_WITH:
partner_with = {
_normalize_token(name)
for name in _sequence(primary.get("partner", {}), "partner_with")
}
if not partner_with:
penalties.append(("primary_missing_partner_with", 0.7))
colors_candidate = set(_sequence(candidate, "color_identity"))
if len(colors_candidate) >= 4:
penalties.append(("candidate_color_spread", 0.25))
total = 0.0
reasons: list[str] = []
for reason, magnitude in penalties:
if magnitude <= 0:
continue
total += magnitude
reasons.append(reason)
return _clamp(total), reasons