feat: Add include/exclude card lists feature with web UI, validation, fuzzy matching, and JSON persistence (ALLOW_MUST_HAVES=1)

2026-03-19 19:56:31 +01:00 · 2025-09-09 09:36:17 -07:00 · 2025-09-09 09:36:17 -07:00 · 0516260304
commit 0516260304
parent 7ef45252f7
39 changed files with 3672 additions and 626 deletions
--- a/code/deck_builder/include_exclude_utils.py
+++ b/code/deck_builder/include_exclude_utils.py
@ -0,0 +1,348 @@
+"""
+Utilities for include/exclude card functionality.
+
+Provides fuzzy matching, card name normalization, and validation
+for must-include and must-exclude card lists.
+"""
+
+from __future__ import annotations
+
+import difflib
+import re
+from typing import List, Dict, Set, Tuple, Optional
+from dataclasses import dataclass
+
+
+# Fuzzy matching configuration
+FUZZY_CONFIDENCE_THRESHOLD = 0.90  # 90% confidence for auto-acceptance
+MAX_SUGGESTIONS = 3  # Maximum suggestions to show for fuzzy matches
+MAX_INCLUDES = 10  # Maximum include cards allowed
+MAX_EXCLUDES = 15  # Maximum exclude cards allowed
+
+
+@dataclass
+@dataclass
+class FuzzyMatchResult:
+    """Result of a fuzzy card name match."""
+    input_name: str
+    matched_name: Optional[str]
+    confidence: float
+    suggestions: List[str]
+    auto_accepted: bool
+
+
+@dataclass
+class IncludeExcludeDiagnostics:
+    """Diagnostics for include/exclude processing."""
+    missing_includes: List[str]
+    ignored_color_identity: List[str]
+    illegal_dropped: List[str]
+    illegal_allowed: List[str]
+    excluded_removed: List[str]
+    duplicates_collapsed: Dict[str, int]
+    include_added: List[str]
+    include_over_ideal: Dict[str, List[str]]  # e.g., {"creatures": ["Card A"]} when includes exceed ideal category counts
+    fuzzy_corrections: Dict[str, str]
+    confirmation_needed: List[Dict[str, any]]
+    list_size_warnings: Dict[str, int]
+
+
+def normalize_card_name(name: str) -> str:
+    """
+    Normalize card names for robust matching.
+    
+    Handles:
+    - Case normalization (casefold)
+    - Punctuation normalization (commas, apostrophes)
+    - Whitespace cleanup
+    - Unicode apostrophe normalization
+    - Arena/Alchemy prefix removal
+    
+    Args:
+        name: Raw card name input
+        
+    Returns:
+        Normalized card name for matching
+    """
+    if not name:
+        return ""
+    
+    # Basic cleanup
+    s = str(name).strip()
+    
+    # Normalize unicode characters
+    s = s.replace('\u2019', "'")  # Curly apostrophe to straight
+    s = s.replace('\u2018', "'")  # Opening single quote
+    s = s.replace('\u201C', '"')  # Opening double quote
+    s = s.replace('\u201D', '"')  # Closing double quote
+    s = s.replace('\u2013', "-")  # En dash
+    s = s.replace('\u2014', "-")  # Em dash
+    
+    # Remove Arena/Alchemy prefix
+    if s.startswith('A-') and len(s) > 2:
+        s = s[2:]
+    
+    # Normalize whitespace
+    s = " ".join(s.split())
+    
+    # Case normalization
+    return s.casefold()
+
+
+def normalize_punctuation(name: str) -> str:
+    """
+    Normalize punctuation for fuzzy matching.
+    
+    Specifically handles the case where users might omit commas:
+    "Krenko, Mob Boss" vs "Krenko Mob Boss"
+    
+    Args:
+        name: Card name to normalize
+        
+    Returns:
+        Name with punctuation variations normalized
+    """
+    if not name:
+        return ""
+    
+    # Remove common punctuation for comparison
+    s = normalize_card_name(name)
+    
+    # Remove commas, colons, and extra spaces for fuzzy matching
+    s = re.sub(r'[,:]', ' ', s)
+    s = re.sub(r'\s+', ' ', s)
+    
+    return s.strip()
+
+
+def fuzzy_match_card_name(
+    input_name: str,
+    card_names: Set[str],
+    confidence_threshold: float = FUZZY_CONFIDENCE_THRESHOLD
+) -> FuzzyMatchResult:
+    """
+    Perform fuzzy matching on a card name against a set of valid names.
+    
+    Args:
+        input_name: User input card name
+        card_names: Set of valid card names to match against
+        confidence_threshold: Minimum confidence for auto-acceptance
+        
+    Returns:
+        FuzzyMatchResult with match information
+    """
+    if not input_name or not card_names:
+        return FuzzyMatchResult(
+            input_name=input_name,
+            matched_name=None,
+            confidence=0.0,
+            suggestions=[],
+            auto_accepted=False
+        )
+    
+    # Normalize input for matching
+    normalized_input = normalize_punctuation(input_name)
+    
+    # Create normalized lookup for card names
+    normalized_to_original = {}
+    for name in card_names:
+        normalized = normalize_punctuation(name)
+        if normalized not in normalized_to_original:
+            normalized_to_original[normalized] = name
+    
+    normalized_names = set(normalized_to_original.keys())
+    
+    # Exact match check (after normalization)
+    if normalized_input in normalized_names:
+        return FuzzyMatchResult(
+            input_name=input_name,
+            matched_name=normalized_to_original[normalized_input],
+            confidence=1.0,
+            suggestions=[],
+            auto_accepted=True
+        )
+    
+    # Fuzzy matching using difflib
+    matches = difflib.get_close_matches(
+        normalized_input, 
+        normalized_names, 
+        n=MAX_SUGGESTIONS + 1,  # Get one extra in case best match is below threshold
+        cutoff=0.6  # Lower cutoff to get more candidates
+    )
+    
+    if not matches:
+        return FuzzyMatchResult(
+            input_name=input_name,
+            matched_name=None,
+            confidence=0.0,
+            suggestions=[],
+            auto_accepted=False
+        )
+    
+    # Calculate actual confidence for best match
+    best_match = matches[0]
+    confidence = difflib.SequenceMatcher(None, normalized_input, best_match).ratio()
+    
+    # Convert back to original names
+    suggestions = [normalized_to_original[match] for match in matches[:MAX_SUGGESTIONS]]
+    best_original = normalized_to_original[best_match]
+    
+    # Auto-accept if confidence is high enough
+    auto_accepted = confidence >= confidence_threshold
+    matched_name = best_original if auto_accepted else None
+    
+    return FuzzyMatchResult(
+        input_name=input_name,
+        matched_name=matched_name,
+        confidence=confidence,
+        suggestions=suggestions,
+        auto_accepted=auto_accepted
+    )
+
+
+def validate_list_sizes(includes: List[str], excludes: List[str]) -> Dict[str, any]:
+    """
+    Validate that include/exclude lists are within acceptable size limits.
+    
+    Args:
+        includes: List of include card names
+        excludes: List of exclude card names
+        
+    Returns:
+        Dictionary with validation results and warnings
+    """
+    include_count = len(includes)
+    exclude_count = len(excludes)
+    
+    warnings = {}
+    errors = []
+    
+    # Size limit checks
+    if include_count > MAX_INCLUDES:
+        errors.append(f"Too many include cards: {include_count} (max {MAX_INCLUDES})")
+    elif include_count >= int(MAX_INCLUDES * 0.8):  # 80% warning threshold
+        warnings['includes_approaching_limit'] = f"Approaching include limit: {include_count}/{MAX_INCLUDES}"
+    
+    if exclude_count > MAX_EXCLUDES:
+        errors.append(f"Too many exclude cards: {exclude_count} (max {MAX_EXCLUDES})")
+    elif exclude_count >= int(MAX_EXCLUDES * 0.8):  # 80% warning threshold
+        warnings['excludes_approaching_limit'] = f"Approaching exclude limit: {exclude_count}/{MAX_EXCLUDES}"
+    
+    return {
+        'valid': len(errors) == 0,
+        'errors': errors,
+        'warnings': warnings,
+        'counts': {
+            'includes': include_count,
+            'excludes': exclude_count,
+            'includes_limit': MAX_INCLUDES,
+            'excludes_limit': MAX_EXCLUDES
+        }
+    }
+
+
+def collapse_duplicates(card_names: List[str]) -> Tuple[List[str], Dict[str, int]]:
+    """
+    Remove duplicates from card list and track collapsed counts.
+    
+    Commander format allows only one copy of each card (except for exceptions),
+    so duplicate entries in user input should be collapsed to single copies.
+    
+    Args:
+        card_names: List of card names (may contain duplicates)
+        
+    Returns:
+        Tuple of (unique_names, duplicate_counts)
+    """
+    if not card_names:
+        return [], {}
+    
+    seen = {}
+    unique_names = []
+    
+    for name in card_names:
+        if not name or not name.strip():
+            continue
+            
+        name = name.strip()
+        normalized = normalize_card_name(name)
+        
+        if normalized not in seen:
+            seen[normalized] = {'original': name, 'count': 1}
+            unique_names.append(name)
+        else:
+            seen[normalized]['count'] += 1
+    
+    # Extract duplicate counts (only for names that appeared more than once)
+    duplicates = {
+        data['original']: data['count'] 
+        for data in seen.values() 
+        if data['count'] > 1
+    }
+    
+    return unique_names, duplicates
+
+
+def parse_card_list_input(input_text: str) -> List[str]:
+    """
+    Parse user input text into a list of card names.
+    
+    Supports:
+    - Newline separated (preferred for cards with commas in names)
+    - Comma separated (only when no newlines present)
+    - Whitespace cleanup
+    
+    Note: If input contains both newlines and commas, newlines take precedence
+    to avoid splitting card names that contain commas.
+    
+    Args:
+        input_text: Raw user input text
+        
+    Returns:
+        List of parsed card names
+    """
+    if not input_text:
+        return []
+    
+    # If input contains newlines, split only on newlines
+    # This prevents breaking card names with commas like "Krenko, Mob Boss"
+    if '\n' in input_text:
+        names = input_text.split('\n')
+    else:
+        # Only split on commas if no newlines present
+        names = input_text.split(',')
+    
+    # Clean up each name
+    cleaned = []
+    for name in names:
+        name = name.strip()
+        if name:  # Skip empty entries
+            cleaned.append(name)
+    
+    return cleaned
+
+
+def get_baseline_performance_metrics() -> Dict[str, any]:
+    """
+    Get baseline performance metrics for regression testing.
+    
+    Returns:
+        Dictionary with timing and memory baselines
+    """
+    import time
+    
+    start_time = time.time()
+    
+    # Simulate some basic operations for baseline
+    test_names = ['Lightning Bolt', 'Krenko, Mob Boss', 'Sol Ring'] * 100
+    for name in test_names:
+        normalize_card_name(name)
+        normalize_punctuation(name)
+    
+    end_time = time.time()
+    
+    return {
+        'normalization_time_ms': (end_time - start_time) * 1000,
+        'operations_count': len(test_names) * 2,  # 2 operations per name
+        'timestamp': time.time()
+    }