mtg_python_deckbuilder/code/deck_builder/include_exclude_utils.py

348 lines
10 KiB
Python

"""
Utilities for include/exclude card functionality.
Provides fuzzy matching, card name normalization, and validation
for must-include and must-exclude card lists.
"""
from __future__ import annotations
import difflib
import re
from typing import List, Dict, Set, Tuple, Optional
from dataclasses import dataclass
# Fuzzy matching configuration
FUZZY_CONFIDENCE_THRESHOLD = 0.90 # 90% confidence for auto-acceptance
MAX_SUGGESTIONS = 3 # Maximum suggestions to show for fuzzy matches
MAX_INCLUDES = 10 # Maximum include cards allowed
MAX_EXCLUDES = 15 # Maximum exclude cards allowed
@dataclass
@dataclass
class FuzzyMatchResult:
"""Result of a fuzzy card name match."""
input_name: str
matched_name: Optional[str]
confidence: float
suggestions: List[str]
auto_accepted: bool
@dataclass
class IncludeExcludeDiagnostics:
"""Diagnostics for include/exclude processing."""
missing_includes: List[str]
ignored_color_identity: List[str]
illegal_dropped: List[str]
illegal_allowed: List[str]
excluded_removed: List[str]
duplicates_collapsed: Dict[str, int]
include_added: List[str]
include_over_ideal: Dict[str, List[str]] # e.g., {"creatures": ["Card A"]} when includes exceed ideal category counts
fuzzy_corrections: Dict[str, str]
confirmation_needed: List[Dict[str, any]]
list_size_warnings: Dict[str, int]
def normalize_card_name(name: str) -> str:
"""
Normalize card names for robust matching.
Handles:
- Case normalization (casefold)
- Punctuation normalization (commas, apostrophes)
- Whitespace cleanup
- Unicode apostrophe normalization
- Arena/Alchemy prefix removal
Args:
name: Raw card name input
Returns:
Normalized card name for matching
"""
if not name:
return ""
# Basic cleanup
s = str(name).strip()
# Normalize unicode characters
s = s.replace('\u2019', "'") # Curly apostrophe to straight
s = s.replace('\u2018', "'") # Opening single quote
s = s.replace('\u201C', '"') # Opening double quote
s = s.replace('\u201D', '"') # Closing double quote
s = s.replace('\u2013', "-") # En dash
s = s.replace('\u2014', "-") # Em dash
# Remove Arena/Alchemy prefix
if s.startswith('A-') and len(s) > 2:
s = s[2:]
# Normalize whitespace
s = " ".join(s.split())
# Case normalization
return s.casefold()
def normalize_punctuation(name: str) -> str:
"""
Normalize punctuation for fuzzy matching.
Specifically handles the case where users might omit commas:
"Krenko, Mob Boss" vs "Krenko Mob Boss"
Args:
name: Card name to normalize
Returns:
Name with punctuation variations normalized
"""
if not name:
return ""
# Remove common punctuation for comparison
s = normalize_card_name(name)
# Remove commas, colons, and extra spaces for fuzzy matching
s = re.sub(r'[,:]', ' ', s)
s = re.sub(r'\s+', ' ', s)
return s.strip()
def fuzzy_match_card_name(
input_name: str,
card_names: Set[str],
confidence_threshold: float = FUZZY_CONFIDENCE_THRESHOLD
) -> FuzzyMatchResult:
"""
Perform fuzzy matching on a card name against a set of valid names.
Args:
input_name: User input card name
card_names: Set of valid card names to match against
confidence_threshold: Minimum confidence for auto-acceptance
Returns:
FuzzyMatchResult with match information
"""
if not input_name or not card_names:
return FuzzyMatchResult(
input_name=input_name,
matched_name=None,
confidence=0.0,
suggestions=[],
auto_accepted=False
)
# Normalize input for matching
normalized_input = normalize_punctuation(input_name)
# Create normalized lookup for card names
normalized_to_original = {}
for name in card_names:
normalized = normalize_punctuation(name)
if normalized not in normalized_to_original:
normalized_to_original[normalized] = name
normalized_names = set(normalized_to_original.keys())
# Exact match check (after normalization)
if normalized_input in normalized_names:
return FuzzyMatchResult(
input_name=input_name,
matched_name=normalized_to_original[normalized_input],
confidence=1.0,
suggestions=[],
auto_accepted=True
)
# Fuzzy matching using difflib
matches = difflib.get_close_matches(
normalized_input,
normalized_names,
n=MAX_SUGGESTIONS + 1, # Get one extra in case best match is below threshold
cutoff=0.6 # Lower cutoff to get more candidates
)
if not matches:
return FuzzyMatchResult(
input_name=input_name,
matched_name=None,
confidence=0.0,
suggestions=[],
auto_accepted=False
)
# Calculate actual confidence for best match
best_match = matches[0]
confidence = difflib.SequenceMatcher(None, normalized_input, best_match).ratio()
# Convert back to original names
suggestions = [normalized_to_original[match] for match in matches[:MAX_SUGGESTIONS]]
best_original = normalized_to_original[best_match]
# Auto-accept if confidence is high enough
auto_accepted = confidence >= confidence_threshold
matched_name = best_original if auto_accepted else None
return FuzzyMatchResult(
input_name=input_name,
matched_name=matched_name,
confidence=confidence,
suggestions=suggestions,
auto_accepted=auto_accepted
)
def validate_list_sizes(includes: List[str], excludes: List[str]) -> Dict[str, any]:
"""
Validate that include/exclude lists are within acceptable size limits.
Args:
includes: List of include card names
excludes: List of exclude card names
Returns:
Dictionary with validation results and warnings
"""
include_count = len(includes)
exclude_count = len(excludes)
warnings = {}
errors = []
# Size limit checks
if include_count > MAX_INCLUDES:
errors.append(f"Too many include cards: {include_count} (max {MAX_INCLUDES})")
elif include_count >= int(MAX_INCLUDES * 0.8): # 80% warning threshold
warnings['includes_approaching_limit'] = f"Approaching include limit: {include_count}/{MAX_INCLUDES}"
if exclude_count > MAX_EXCLUDES:
errors.append(f"Too many exclude cards: {exclude_count} (max {MAX_EXCLUDES})")
elif exclude_count >= int(MAX_EXCLUDES * 0.8): # 80% warning threshold
warnings['excludes_approaching_limit'] = f"Approaching exclude limit: {exclude_count}/{MAX_EXCLUDES}"
return {
'valid': len(errors) == 0,
'errors': errors,
'warnings': warnings,
'counts': {
'includes': include_count,
'excludes': exclude_count,
'includes_limit': MAX_INCLUDES,
'excludes_limit': MAX_EXCLUDES
}
}
def collapse_duplicates(card_names: List[str]) -> Tuple[List[str], Dict[str, int]]:
"""
Remove duplicates from card list and track collapsed counts.
Commander format allows only one copy of each card (except for exceptions),
so duplicate entries in user input should be collapsed to single copies.
Args:
card_names: List of card names (may contain duplicates)
Returns:
Tuple of (unique_names, duplicate_counts)
"""
if not card_names:
return [], {}
seen = {}
unique_names = []
for name in card_names:
if not name or not name.strip():
continue
name = name.strip()
normalized = normalize_card_name(name)
if normalized not in seen:
seen[normalized] = {'original': name, 'count': 1}
unique_names.append(name)
else:
seen[normalized]['count'] += 1
# Extract duplicate counts (only for names that appeared more than once)
duplicates = {
data['original']: data['count']
for data in seen.values()
if data['count'] > 1
}
return unique_names, duplicates
def parse_card_list_input(input_text: str) -> List[str]:
"""
Parse user input text into a list of card names.
Supports:
- Newline separated (preferred for cards with commas in names)
- Comma separated (only when no newlines present)
- Whitespace cleanup
Note: If input contains both newlines and commas, newlines take precedence
to avoid splitting card names that contain commas.
Args:
input_text: Raw user input text
Returns:
List of parsed card names
"""
if not input_text:
return []
# If input contains newlines, split only on newlines
# This prevents breaking card names with commas like "Krenko, Mob Boss"
if '\n' in input_text:
names = input_text.split('\n')
else:
# Only split on commas if no newlines present
names = input_text.split(',')
# Clean up each name
cleaned = []
for name in names:
name = name.strip()
if name: # Skip empty entries
cleaned.append(name)
return cleaned
def get_baseline_performance_metrics() -> Dict[str, any]:
"""
Get baseline performance metrics for regression testing.
Returns:
Dictionary with timing and memory baselines
"""
import time
start_time = time.time()
# Simulate some basic operations for baseline
test_names = ['Lightning Bolt', 'Krenko, Mob Boss', 'Sol Ring'] * 100
for name in test_names:
normalize_card_name(name)
normalize_punctuation(name)
end_time = time.time()
return {
'normalization_time_ms': (end_time - start_time) * 1000,
'operations_count': len(test_names) * 2, # 2 operations per name
'timestamp': time.time()
}