mtg_python_deckbuilder/code/web/services/synergy_builder.py

607 lines
25 KiB
Python

"""
Synergy Builder - Analyzes multiple deck builds and creates optimized "best-of" deck.
Takes multiple builds of the same configuration and identifies cards that appear
frequently across builds, scoring them for synergy based on:
- Frequency of appearance (higher = more consistent with strategy)
- EDHREC rank (lower rank = more popular/powerful)
- Theme tag matches (more matching tags = better fit)
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from collections import Counter
from code.logging_util import get_logger
from code.deck_builder import builder_utils as bu
import pandas as pd
import os
logger = get_logger(__name__)
@dataclass
class ScoredCard:
"""A card with its synergy score and metadata."""
name: str
frequency: float # 0.0-1.0, percentage of builds containing this card
appearance_count: int # Number of builds this card appears in
synergy_score: float # 0-100+ calculated score
category: str # Card type category (Creature, Land, etc.)
role: str = "" # Card role from tagging
tags: List[str] = field(default_factory=list) # Theme tags
edhrec_rank: Optional[int] = None # EDHREC rank if available
count: int = 1 # Number of copies (usually 1 for Commander)
type_line: str = "" # Full type line (e.g., "Creature — Rabbit Scout")
@dataclass
class CardPool:
"""Aggregated pool of cards from multiple builds."""
cards: Dict[str, ScoredCard] # card_name -> ScoredCard
total_builds: int
config: Dict[str, Any] # Original build configuration
themes: List[str] # Theme tags from config
def get_by_category(self, category: str) -> List[ScoredCard]:
"""Get all cards in a specific category."""
return [card for card in self.cards.values() if card.category == category]
def get_top_cards(self, limit: int = 100) -> List[ScoredCard]:
"""Get top N cards by synergy score."""
return sorted(self.cards.values(), key=lambda c: c.synergy_score, reverse=True)[:limit]
def get_high_frequency_cards(self, min_frequency: float = 0.8) -> List[ScoredCard]:
"""Get cards appearing in at least min_frequency of builds."""
return [card for card in self.cards.values() if card.frequency >= min_frequency]
class SynergyAnalyzer:
"""Analyzes multiple builds and scores cards for synergy."""
# Scoring weights
FREQUENCY_WEIGHT = 0.5
EDHREC_WEIGHT = 0.25
THEME_WEIGHT = 0.25
HIGH_FREQUENCY_BONUS = 1.1 # 10% bonus for cards in 80%+ builds
def __init__(self):
"""Initialize synergy analyzer."""
self._type_line_cache: Dict[str, str] = {}
def _load_type_lines(self) -> Dict[str, str]:
"""
Load card type lines from parquet for all cards.
Returns:
Dict mapping card name (lowercase) to type_line
"""
if self._type_line_cache:
return self._type_line_cache
try:
parquet_path = os.path.join("card_files", "processed", "all_cards.parquet")
if not os.path.exists(parquet_path):
logger.warning(f"[Synergy] Card parquet not found at {parquet_path}")
return {}
df = pd.read_parquet(parquet_path)
# Try 'type' first, then 'type_line'
type_col = None
if 'type' in df.columns:
type_col = 'type'
elif 'type_line' in df.columns:
type_col = 'type_line'
if not type_col or 'name' not in df.columns:
logger.warning(f"[Synergy] Card parquet missing required columns. Available: {list(df.columns)}")
return {}
# Build mapping: lowercase name -> type_line
for _, row in df.iterrows():
name = str(row.get('name', '')).strip()
type_line = str(row.get(type_col, '')).strip()
if name and type_line:
self._type_line_cache[name.lower()] = type_line
logger.info(f"[Synergy] Loaded type lines for {len(self._type_line_cache)} cards from parquet")
return self._type_line_cache
except Exception as e:
logger.warning(f"[Synergy] Error loading type lines from parquet: {e}")
return {}
def analyze_builds(self, builds: List[Dict[str, Any]], config: Dict[str, Any]) -> CardPool:
"""
Aggregate all cards from builds and calculate appearance frequencies.
Args:
builds: List of build results from BuildCache
config: Original deck configuration
Returns:
CardPool with all unique cards and their frequencies
"""
logger.info(f"[Synergy] Analyzing {len(builds)} builds for synergy")
if not builds:
raise ValueError("Cannot analyze synergy with no builds")
total_builds = len(builds)
themes = config.get("tags", [])
# Load type lines from card CSV
type_line_map = self._load_type_lines()
# Count card appearances and cumulative counts across all builds
card_appearances: Counter = Counter() # card_name -> number of builds containing it
card_total_counts: Counter = Counter() # card_name -> sum of counts across all builds
card_metadata: Dict[str, Dict[str, Any]] = {}
for build in builds:
result = build.get("result", {})
summary = result.get("summary", {})
if not isinstance(summary, dict):
logger.warning("[Synergy] Build missing summary, skipping")
continue
type_breakdown = summary.get("type_breakdown", {})
if not isinstance(type_breakdown, dict):
continue
type_cards = type_breakdown.get("cards", {})
if not isinstance(type_cards, dict):
continue
# Collect unique cards from this build
unique_cards_in_build = set()
for category, card_list in type_cards.items():
if not isinstance(card_list, list):
continue
for card in card_list:
if not isinstance(card, dict):
continue
card_name = card.get("name")
if not card_name:
continue
card_count = card.get("count", 1)
unique_cards_in_build.add(card_name)
# Track cumulative count across all builds (for multi-copy cards like basics)
card_total_counts[card_name] += card_count
# Store metadata (first occurrence)
if card_name not in card_metadata:
# Get type_line from parquet, fallback to card data (which won't have it from summary)
type_line = type_line_map.get(card_name.lower(), "")
if not type_line:
type_line = card.get("type", card.get("type_line", ""))
# Debug: Log first few cards
if len(card_metadata) < 3:
logger.info(f"[Synergy Debug] Card: {card_name}, Type line: {type_line}, From map: {card_name.lower() in type_line_map}")
card_metadata[card_name] = {
"category": category,
"role": card.get("role", ""),
"tags": card.get("tags", []),
"type_line": type_line
}
# Increment appearance count for each unique card in this build
for card_name in unique_cards_in_build:
card_appearances[card_name] += 1
# Create ScoredCard objects with frequencies and average counts
scored_cards: Dict[str, ScoredCard] = {}
for card_name, appearance_count in card_appearances.items():
frequency = appearance_count / total_builds
metadata = card_metadata.get(card_name, {})
scored_card = ScoredCard(
name=card_name,
frequency=frequency,
appearance_count=appearance_count,
synergy_score=0.0, # Will be calculated next
category=metadata.get("category", "Unknown"),
role=metadata.get("role", ""),
tags=metadata.get("tags", []),
count=1, # Default to 1 copy per card in synergy deck (basics override this later)
type_line=metadata.get("type_line", "")
)
# Debug: Log first few scored cards
if len(scored_cards) < 3:
logger.info(f"[Synergy Debug] ScoredCard: {scored_card.name}, type_line='{scored_card.type_line}', count={scored_card.count}, in_map={card_name.lower() in type_line_map}")
# Calculate synergy score
scored_card.synergy_score = self.score_card(scored_card, themes)
scored_cards[card_name] = scored_card
logger.info(f"[Synergy] Analyzed {len(scored_cards)} unique cards from {total_builds} builds")
return CardPool(
cards=scored_cards,
total_builds=total_builds,
config=config,
themes=themes
)
def score_card(self, card: ScoredCard, themes: List[str]) -> float:
"""
Calculate synergy score for a card.
Score = frequency_weight * frequency * 100 +
edhrec_weight * (1 - rank/max_rank) * 100 +
theme_weight * (matching_tags / total_tags) * 100
Args:
card: ScoredCard to score
themes: Theme tags from config
Returns:
Synergy score (0-100+)
"""
# Frequency component (0-100)
frequency_score = card.frequency * 100
# EDHREC component (placeholder - would need EDHREC data)
# For now, assume no EDHREC data available
edhrec_score = 50.0 # Neutral score
# Theme component (0-100)
theme_score = 0.0
if themes and card.tags:
theme_set = set(themes)
card_tag_set = set(card.tags)
matching_tags = len(theme_set & card_tag_set)
theme_score = (matching_tags / len(themes)) * 100 if themes else 0.0
# Calculate weighted score
score = (
self.FREQUENCY_WEIGHT * frequency_score +
self.EDHREC_WEIGHT * edhrec_score +
self.THEME_WEIGHT * theme_score
)
# Bonus for high-frequency cards (appear in 80%+ builds)
if card.frequency >= 0.8:
score *= self.HIGH_FREQUENCY_BONUS
return round(score, 2)
class SynergyDeckBuilder:
"""Builds an optimized deck from a synergy-scored card pool."""
def __init__(self, analyzer: Optional[SynergyAnalyzer] = None):
"""
Initialize synergy deck builder.
Args:
analyzer: SynergyAnalyzer instance (creates new if None)
"""
self.analyzer = analyzer or SynergyAnalyzer()
def _allocate_basic_lands(
self,
selected_cards: List[ScoredCard],
by_category: Dict[str, List[ScoredCard]],
pool: CardPool,
ideals: Optional[Dict[str, int]]
) -> List[ScoredCard]:
"""
Allocate basic lands based on color identity and remaining land slots.
Separates basic lands from nonbasics, then allocates basics based on:
1. Total lands target from ideals
2. Color identity from config
3. Current nonbasic land count
Args:
selected_cards: Currently selected cards (may include basics from pool)
by_category: Cards grouped by category
pool: Card pool with configuration
ideals: Ideal card counts
Returns:
Updated list of selected cards with properly allocated basics
"""
if not ideals:
return selected_cards # No ideals, keep as-is
# Get basic land names
basic_names = bu.basic_land_names()
# Separate basics from nonbasics
nonbasic_cards = [c for c in selected_cards if c.name not in basic_names]
# Calculate how many basics we need
# Note: For nonbasics, count=1 per card (singleton rule), so count == number of unique cards
target_lands = ideals.get("lands", 35)
nonbasic_lands = [c for c in nonbasic_cards if c.category == "Land"]
current_nonbasic_count = len(nonbasic_lands)
# If we have too many nonbasics, trim them
if current_nonbasic_count > target_lands:
logger.info(f"[Synergy] Too many nonbasics ({current_nonbasic_count}), trimming to {target_lands}")
# Keep the highest scoring nonbasics
sorted_nonbasic_lands = sorted(nonbasic_lands, key=lambda c: c.synergy_score, reverse=True)
trimmed_nonbasic_lands = sorted_nonbasic_lands[:target_lands]
# Update nonbasic_cards to exclude trimmed lands
other_nonbasics = [c for c in nonbasic_cards if c.category != "Land"]
nonbasic_cards = other_nonbasics + trimmed_nonbasic_lands
return nonbasic_cards # No room for basics
needed_basics = max(0, target_lands - current_nonbasic_count)
if needed_basics == 0:
logger.info("[Synergy] No basic lands needed (nonbasics exactly fill target)")
return nonbasic_cards
logger.info(f"[Synergy] Need {needed_basics} basics to fill {target_lands} land target (have {current_nonbasic_count} nonbasics)")
# Get color identity from config
color_identity = pool.config.get("colors", [])
if not color_identity:
logger.warning(f"[Synergy] No color identity in config (keys: {list(pool.config.keys())}), skipping basic land allocation")
return nonbasic_cards
# Map colors to basic land names
from code.deck_builder import builder_constants as bc
basic_map = getattr(bc, 'BASIC_LAND_MAPPING', {
'W': 'Plains', 'U': 'Island', 'B': 'Swamp', 'R': 'Mountain', 'G': 'Forest'
})
# Allocate basics evenly across colors
allocation: Dict[str, int] = {}
colors = [c.upper() for c in color_identity if c.upper() in basic_map]
if not colors:
logger.warning(f"[Synergy] No valid colors found in identity: {color_identity}")
return nonbasic_cards
# Distribute basics evenly, with remainder going to first colors
n = len(colors)
base = needed_basics // n
rem = needed_basics % n
for idx, color in enumerate(sorted(colors)): # sorted for deterministic allocation
count = base + (1 if idx < rem else 0)
land_name = basic_map.get(color)
if land_name:
allocation[land_name] = count
# Create ScoredCard objects for basics
basic_cards = []
for land_name, count in allocation.items():
# Try to get type_line from cache first (most reliable)
type_line = self.analyzer._type_line_cache.get(land_name.lower(), "")
if not type_line:
# Fallback: construct from land name
type_line = f"Basic Land — {land_name[:-1] if land_name.endswith('s') else land_name}"
# Try to get existing scored data from pool, else create minimal entry
if land_name in pool.cards:
existing = pool.cards[land_name]
basic_card = ScoredCard(
name=land_name,
frequency=existing.frequency,
appearance_count=existing.appearance_count,
synergy_score=existing.synergy_score,
category="Land",
role="basic",
tags=[],
count=count,
type_line=type_line # Use looked-up type_line
)
else:
# Not in pool (common for basics), create minimal entry
basic_card = ScoredCard(
name=land_name,
frequency=1.0, # Assume high frequency for basics
appearance_count=pool.total_builds,
synergy_score=50.0, # Neutral score
category="Land",
role="basic",
tags=[],
count=count,
type_line=type_line
)
basic_cards.append(basic_card)
# Update by_category to replace old basics with new allocation
land_category = by_category.get("Land", [])
land_category = [c for c in land_category if c.name not in basic_names] # Remove old basics
land_category.extend(basic_cards) # Add new basics
by_category["Land"] = land_category
# Combine and return
result = nonbasic_cards + basic_cards
logger.info(f"[Synergy] Allocated {needed_basics} basic lands across {len(colors)} colors: {allocation}")
return result
def build_deck(
self,
pool: CardPool,
ideals: Optional[Dict[str, int]] = None,
target_size: int = 99 # Commander + 99 cards = 100
) -> Dict[str, Any]:
"""
Build an optimized deck from the card pool, respecting ideal counts.
Selects highest-scoring cards by category to meet ideal distributions.
Args:
pool: CardPool with scored cards
ideals: Target card counts by category (e.g., {"Creature": 25, "Land": 35})
target_size: Total number of cards to include (default 99, excluding commander)
Returns:
Dict with deck list and metadata
"""
logger.info(f"[Synergy] Building deck from pool of {len(pool.cards)} cards")
# Map category names to ideal keys (case-insensitive matching)
category_mapping = {
"Creature": "creatures",
"Land": "lands",
"Artifact": "artifacts",
"Enchantment": "enchantments",
"Instant": "instants",
"Sorcery": "sorceries",
"Planeswalker": "planeswalkers",
"Battle": "battles"
}
selected_cards: List[ScoredCard] = []
by_category: Dict[str, List[ScoredCard]] = {}
if ideals:
# Build by category to meet ideals (±2 tolerance)
logger.info(f"[Synergy] Using ideals: {ideals}")
# Get basic land names for filtering
basic_names = bu.basic_land_names()
for category in ["Land", "Creature", "Artifact", "Enchantment", "Instant", "Sorcery", "Planeswalker", "Battle"]:
ideal_key = category_mapping.get(category, category.lower())
target_count = ideals.get(ideal_key, 0)
if target_count == 0:
continue
# Get all cards in this category sorted by score
all_category_cards = pool.get_by_category(category)
# For lands: only select nonbasics (basics allocated separately based on color identity)
if category == "Land":
# Filter out basics
nonbasic_lands = [c for c in all_category_cards if c.name not in basic_names]
category_cards = sorted(
nonbasic_lands,
key=lambda c: c.synergy_score,
reverse=True
)
# Reserve space for basics - typically want 15-20 basics minimum
# So select fewer nonbasics to leave room
min_basics_estimate = 15 # Reasonable minimum for most decks
max_nonbasics = max(0, target_count - min_basics_estimate)
selected = category_cards[:max_nonbasics]
logger.info(f"[Synergy] Land: selected {len(selected)} nonbasics (max {max_nonbasics}, leaving room for basics)")
else:
category_cards = sorted(
all_category_cards,
key=lambda c: c.synergy_score,
reverse=True
)
# Select top cards up to target count
selected = category_cards[:target_count]
selected_cards.extend(selected)
by_category[category] = selected
logger.info(
f"[Synergy] {category}: selected {len(selected)}/{target_count} "
f"(pool had {len(category_cards)} available)"
)
# Calculate how many basics we'll need before filling remaining slots
target_lands = ideals.get("lands", 35)
current_land_count = len(by_category.get("Land", []))
estimated_basics = max(0, target_lands - current_land_count)
# Fill remaining slots with highest-scoring cards from any category (except Land)
# But reserve space for basic lands that will be added later
remaining_slots = target_size - len(selected_cards) - estimated_basics
if remaining_slots > 0:
selected_names = {c.name for c in selected_cards}
# Exclude Land category from filler to avoid over-selecting lands
remaining_pool = [
c for c in pool.get_top_cards(limit=len(pool.cards))
if c.name not in selected_names and c.category != "Land"
]
filler_cards = remaining_pool[:remaining_slots]
selected_cards.extend(filler_cards)
# Add filler cards to by_category
for card in filler_cards:
by_category.setdefault(card.category, []).append(card)
logger.info(f"[Synergy] Filled {len(filler_cards)} remaining slots (reserved {estimated_basics} for basics)")
else:
# No ideals provided - fall back to top-scoring cards
logger.info("[Synergy] No ideals provided, selecting top-scoring cards")
sorted_cards = pool.get_top_cards(limit=len(pool.cards))
selected_cards = sorted_cards[:target_size]
# Group by category for summary
for card in selected_cards:
by_category.setdefault(card.category, []).append(card)
# Add basic lands after nonbasics are selected
selected_cards = self._allocate_basic_lands(selected_cards, by_category, pool, ideals)
# Calculate stats (accounting for multi-copy cards)
unique_cards = len(selected_cards)
total_cards = sum(c.count for c in selected_cards) # Actual card count including duplicates
# Debug: Check for cards with unexpected counts
cards_with_count = [(c.name, c.count) for c in selected_cards if c.count != 1]
if cards_with_count:
logger.info(f"[Synergy Debug] Cards with count != 1: {cards_with_count[:10]}")
avg_frequency = sum(c.frequency for c in selected_cards) / unique_cards if unique_cards else 0
avg_score = sum(c.synergy_score for c in selected_cards) / unique_cards if unique_cards else 0
high_freq_count = len([c for c in selected_cards if c.frequency >= 0.8])
logger.info(
f"[Synergy] Built deck: {total_cards} cards ({unique_cards} unique), "
f"avg frequency={avg_frequency:.2f}, avg score={avg_score:.2f}, "
f"high-frequency cards={high_freq_count}"
)
return {
"cards": selected_cards,
"by_category": by_category,
"total_cards": total_cards, # Actual count including duplicates
"unique_cards": unique_cards, # Unique card types
"avg_frequency": round(avg_frequency, 3),
"avg_score": round(avg_score, 2),
"high_frequency_count": high_freq_count,
"commander": pool.config.get("commander"),
"themes": pool.themes
}
# Global analyzer instance
_analyzer = SynergyAnalyzer()
_builder = SynergyDeckBuilder(_analyzer)
def analyze_and_build_synergy_deck(
builds: List[Dict[str, Any]],
config: Dict[str, Any]
) -> Dict[str, Any]:
"""
Convenience function to analyze builds and create synergy deck in one call.
Args:
builds: List of build results
config: Original deck configuration (includes ideals)
Returns:
Synergy deck result dict
"""
pool = _analyzer.analyze_builds(builds, config)
ideals = config.get("ideals", {})
deck = _builder.build_deck(pool, ideals=ideals)
return deck