"""Theme editorial service for quality scoring and metadata management. Roadmap R12 Milestones 1-2: Editorial Fields + Heuristics Externalization Phase E+ enhancement for theme catalog editorial metadata. Responsibilities: - Calculate editorial quality scores for theme entries - Validate editorial field completeness and consistency - Suggest example commanders and cards for themes - Infer deck archetypes from theme patterns - Calculate popularity buckets from commander/card counts - Load and apply external editorial heuristics - Provide editorial metadata APIs for frontend consumption Follows R9 Backend Unification patterns: - Extends BaseService - Uses structured error handling (ValidationError, NotFoundError) - Integrates with existing theme_catalog_loader infrastructure - Provides telemetry integration points """ from __future__ import annotations from typing import Dict, List, Optional, Any from pathlib import Path import logging import yaml from .base import BaseService, NotFoundError from .theme_catalog_loader import load_index, slugify try: from type_definitions_theme_catalog import ThemeEntry, PopularityBucket, ALLOWED_DECK_ARCHETYPES, DescriptionSource except ImportError: # pragma: no cover from code.type_definitions_theme_catalog import ThemeEntry, PopularityBucket, ALLOWED_DECK_ARCHETYPES, DescriptionSource logger = logging.getLogger(__name__) # Default heuristics path (can be overridden in __init__) # Path calculation: from code/web/services/ → code/web/ → code/ → project root DEFAULT_HEURISTICS_PATH = Path(__file__).resolve().parents[3] / 'config' / 'themes' / 'editorial_heuristics.yml' # Legacy constants (will be loaded from heuristics file in M2, kept for backward compatibility) WEIGHT_HAS_DESCRIPTION = 20 WEIGHT_HAS_EXAMPLE_COMMANDERS = 15 WEIGHT_HAS_EXAMPLE_CARDS = 15 WEIGHT_HAS_DECK_ARCHETYPE = 10 WEIGHT_HAS_POPULARITY_BUCKET = 10 WEIGHT_HAS_SYNERGY_COMMANDERS = 10 WEIGHT_DESCRIPTION_LENGTH_BONUS = 10 WEIGHT_MULTIPLE_EXAMPLE_COMMANDERS = 10 WEIGHT_MULTIPLE_EXAMPLE_CARDS = 10 QUALITY_EXCELLENT = 85 QUALITY_GOOD = 65 QUALITY_FAIR = 40 DEFAULT_POPULARITY_BOUNDARIES = [40, 100, 220, 500] ARCHETYPE_KEYWORDS: Dict[str, List[str]] = { 'Combo': ['combo', 'infinite', 'storm'], 'Stax': ['stax', 'tax', 'lock', 'denial'], 'Voltron': ['voltron', 'aura', 'equipment'], 'Aggro': ['aggro', 'burn', 'fast', 'pressure', 'combat'], 'Control': ['control', 'counter', 'removal', 'wipes'], 'Midrange': ['midrange', 'value', 'flexible'], 'Graveyard': ['graveyard', 'reanimate', 'dredge', 'recursion'], 'Tokens': ['tokens', 'wide', 'go-wide'], 'Counters': ['+1/+1', 'counters', 'proliferate'], 'Ramp': ['ramp', 'big-mana', 'lands'], 'Spells': ['spellslinger', 'spells-matter', 'instants', 'sorceries'], 'Artifacts': ['artifacts', 'artifact-matters'], 'Enchantments': ['enchantments', 'enchantress', 'constellation'], 'Politics': ['group-hug', 'pillowfort', 'politics', 'monarch'], 'Toolbox': ['toolbox', 'tutor', 'silver-bullet'], } class ThemeEditorialService(BaseService): """Service for theme editorial quality scoring and metadata management. Extends BaseService following R9 patterns. M2 enhancement: loads external heuristics. """ def __init__(self, heuristics_path: Optional[Path] = None) -> None: """Initialize editorial service with optional heuristics override. Args: heuristics_path: Optional path to editorial_heuristics.yml (defaults to config/themes/) """ super().__init__() self._heuristics_path = heuristics_path or DEFAULT_HEURISTICS_PATH self._heuristics_cache: Optional[Dict[str, Any]] = None def load_heuristics(self, force_reload: bool = False) -> Dict[str, Any]: """Load editorial heuristics from YAML file (cached). Args: force_reload: If True, bypass cache and reload from disk Returns: Dictionary with heuristics configuration Raises: NotFoundError: If heuristics file doesn't exist ValidationError: If heuristics file is invalid """ if self._heuristics_cache and not force_reload: return self._heuristics_cache if not self._heuristics_path.exists(): # Fallback to legacy behavior if heuristics file not found (cache the fallback) logger.warning(f"Heuristics file not found at {self._heuristics_path}, using legacy constants") self._heuristics_cache = { 'quality_thresholds': { 'excellent_min_score': QUALITY_EXCELLENT, 'good_min_score': QUALITY_GOOD, 'fair_min_score': QUALITY_FAIR, 'manual_description_bonus': 10, 'rule_description_bonus': 5, 'generic_description_bonus': 0, }, 'generic_staple_cards': [], 'archetype_keywords': ARCHETYPE_KEYWORDS, } return self._heuristics_cache try: with open(self._heuristics_path, 'r', encoding='utf-8') as f: self._heuristics_cache = yaml.safe_load(f) # Basic validation if not isinstance(self._heuristics_cache, dict): raise ValueError("Heuristics file must contain a YAML dictionary") required_keys = ['quality_thresholds', 'generic_staple_cards'] for key in required_keys: if key not in self._heuristics_cache: logger.warning(f"Heuristics missing required key: {key}") logger.info(f"Loaded editorial heuristics from {self._heuristics_path}") return self._heuristics_cache except Exception as e: logger.error(f"Failed to load heuristics: {e}") raise NotFoundError(f"Failed to load editorial heuristics: {e}") def get_generic_staple_cards(self) -> List[str]: """Get list of generic staple cards from heuristics. Returns: List of card names considered generic/staples """ heuristics = self.load_heuristics() return heuristics.get('generic_staple_cards', []) def is_generic_card(self, card_name: str) -> bool: """Check if a card is considered a generic staple. Args: card_name: Card name to check Returns: True if card is in generic staples list """ generic_cards = self.get_generic_staple_cards() return card_name in generic_cards def get_theme_metadata(self, theme_name: str) -> Dict[str, Any]: """Retrieve editorial metadata for a theme. Args: theme_name: Theme display name (e.g., "Aristocrats") Returns: Dictionary with editorial metadata including: - theme: Theme display name - description: Theme description - example_commanders: List of example commander names - example_cards: List of example card names - synergy_commanders: List of synergy commander entries - deck_archetype: Deck archetype classification - popularity_bucket: Popularity tier - editorial_quality: Quality lifecycle flag (draft|reviewed|final) - quality_score: Computed quality score (0-100) Raises: NotFoundError: If theme not found in catalog """ slug = slugify(theme_name) index = load_index() if slug not in index.slug_to_entry: raise NotFoundError(f"Theme not found: {theme_name}") entry = index.slug_to_entry[slug] quality_score = self.calculate_quality_score(entry) return { 'theme': entry.theme, 'description': entry.description or '', 'example_commanders': entry.example_commanders or [], 'example_cards': entry.example_cards or [], 'synergy_commanders': entry.synergy_commanders or [], 'deck_archetype': entry.deck_archetype, 'popularity_bucket': entry.popularity_bucket, 'editorial_quality': entry.editorial_quality, 'quality_score': quality_score, 'synergies': entry.synergies or [], 'primary_color': entry.primary_color, 'secondary_color': entry.secondary_color, } def calculate_quality_score(self, theme_entry: ThemeEntry) -> int: """Calculate editorial quality score for a theme entry. M2 Enhancement: Uses external heuristics for thresholds and bonuses. Score is based on presence and quality of editorial fields: - Description (20 points base, +10 if > 50 chars, +bonus for source type) - Example commanders (15 points base, +10 if 3+) - Example cards (15 points base, +10 if 5+) - Deck archetype (10 points) - Popularity bucket (10 points) - Synergy commanders (10 points) Args: theme_entry: ThemeEntry Pydantic model instance Returns: Quality score (0-100) """ heuristics = self.load_heuristics() thresholds = heuristics.get('quality_thresholds', {}) score = 0 # Description (20 base + 10 length bonus + source bonus) if theme_entry.description: score += WEIGHT_HAS_DESCRIPTION if len(theme_entry.description) > 50: score += WEIGHT_DESCRIPTION_LENGTH_BONUS # Bonus based on description source (from heuristics) if theme_entry.description_source: source_bonuses = { 'manual': thresholds.get('manual_description_bonus', 10), 'rule': thresholds.get('rule_description_bonus', 5), 'generic': thresholds.get('generic_description_bonus', 0), } score += source_bonuses.get(theme_entry.description_source, 0) # Example commanders if theme_entry.example_commanders: score += WEIGHT_HAS_EXAMPLE_COMMANDERS if len(theme_entry.example_commanders) >= 3: score += WEIGHT_MULTIPLE_EXAMPLE_COMMANDERS # Example cards (with generic card penalty - M2 enhancement) if theme_entry.example_cards: score += WEIGHT_HAS_EXAMPLE_CARDS if len(theme_entry.example_cards) >= 5: score += WEIGHT_MULTIPLE_EXAMPLE_CARDS # Penalize for too many generic staples (M2) generic_cards = self.get_generic_staple_cards() if generic_cards: generic_count = sum(1 for card in theme_entry.example_cards if card in generic_cards) generic_ratio = generic_count / max(1, len(theme_entry.example_cards)) if generic_ratio > 0.5: # More than 50% generic score -= 5 # Small penalty # Deck archetype if theme_entry.deck_archetype: score += WEIGHT_HAS_DECK_ARCHETYPE # Popularity bucket if theme_entry.popularity_bucket: score += WEIGHT_HAS_POPULARITY_BUCKET # Synergy commanders if theme_entry.synergy_commanders: score += WEIGHT_HAS_SYNERGY_COMMANDERS return min(score, 100) # Cap at 100 def get_quality_tier(self, score: int) -> str: """Convert quality score to tier label. M2 Enhancement: Uses external heuristics for tier thresholds. Args: score: Quality score (0-100) Returns: Quality tier: 'Excellent', 'Good', 'Fair', or 'Poor' """ heuristics = self.load_heuristics() thresholds = heuristics.get('quality_thresholds', {}) excellent_min = thresholds.get('excellent_min_score', QUALITY_EXCELLENT) good_min = thresholds.get('good_min_score', QUALITY_GOOD) fair_min = thresholds.get('fair_min_score', QUALITY_FAIR) if score >= excellent_min: return 'Excellent' elif score >= good_min: return 'Good' elif score >= fair_min: return 'Fair' else: return 'Poor' def validate_editorial_fields(self, theme_entry: ThemeEntry) -> List[str]: """Validate editorial fields and return list of issues. Checks: - Deck archetype is in ALLOWED_DECK_ARCHETYPES - Popularity bucket is valid - Example commanders list is not empty - Example cards list is not empty - Description exists and is not generic fallback Args: theme_entry: ThemeEntry Pydantic model instance Returns: List of validation issue messages (empty if valid) """ issues = [] # Deck archetype validation if theme_entry.deck_archetype: if theme_entry.deck_archetype not in ALLOWED_DECK_ARCHETYPES: issues.append(f"Invalid deck_archetype: {theme_entry.deck_archetype}") else: issues.append("Missing deck_archetype") # Popularity bucket validation if not theme_entry.popularity_bucket: issues.append("Missing popularity_bucket") # Example commanders if not theme_entry.example_commanders: issues.append("Missing example_commanders") elif len(theme_entry.example_commanders) < 2: issues.append("Too few example_commanders (minimum 2 recommended)") # Example cards if not theme_entry.example_cards: issues.append("Missing example_cards") elif len(theme_entry.example_cards) < 3: issues.append("Too few example_cards (minimum 3 recommended)") # Description validation if not theme_entry.description: issues.append("Missing description") else: # Check for generic auto-generated descriptions desc = theme_entry.description if any(desc.startswith(prefix) for prefix in ['Accumulates ', 'Builds around ', 'Leverages ']): if 'Synergies like' not in desc: issues.append("Description appears to be minimal fallback template") # Check description_source if not theme_entry.description_source: issues.append("Missing description_source (should be 'rule', 'generic', or 'manual')") elif theme_entry.description_source == 'generic': issues.append("Description source is 'generic' - consider upgrading to rule-based or manual") # Popularity pinning validation if theme_entry.popularity_pinned and not theme_entry.popularity_bucket: issues.append("popularity_pinned is True but popularity_bucket is missing") return issues def suggest_example_commanders(self, theme_name: str, limit: int = 5) -> List[str]: """Suggest example commanders for a theme based on synergies. This is a placeholder for future ML/analytics-based suggestions. Currently returns existing commanders or empty list. Args: theme_name: Theme display name limit: Maximum number of suggestions Returns: List of commander names (up to limit) Raises: NotFoundError: If theme not found """ slug = slugify(theme_name) index = load_index() if slug not in index.slug_to_entry: raise NotFoundError(f"Theme not found: {theme_name}") entry = index.slug_to_entry[slug] commanders = entry.example_commanders or [] # Future enhancement: Query commander catalog for synergy matches # For now, return existing commanders return commanders[:limit] def infer_deck_archetype(self, theme_name: str, synergies: Optional[List[str]] = None) -> Optional[str]: """Infer deck archetype from theme name and synergies. Uses keyword matching against ARCHETYPE_KEYWORDS. Returns first matching archetype or None. Args: theme_name: Theme display name synergies: Optional list of synergy theme names (defaults to theme's synergies) Returns: Deck archetype name from ALLOWED_DECK_ARCHETYPES or None """ # Get synergies if not provided if synergies is None: slug = slugify(theme_name) index = load_index() if slug in index.slug_to_entry: entry = index.slug_to_entry[slug] synergies = entry.synergies or [] else: synergies = [] # Build search text (lowercase) search_text = f"{theme_name.lower()} {' '.join(s.lower() for s in synergies)}" # Match against archetype keywords (ordered by specificity) for archetype, keywords in ARCHETYPE_KEYWORDS.items(): for keyword in keywords: if keyword in search_text: return archetype return None def calculate_popularity_bucket( self, commander_count: int, card_count: int, boundaries: Optional[List[int]] = None ) -> PopularityBucket: """Calculate popularity bucket from commander/card counts. Uses total frequency (commander_count + card_count) against thresholds. Default boundaries: [40, 100, 220, 500] Args: commander_count: Number of commanders with this theme card_count: Number of cards with this theme boundaries: Custom boundaries (4 values, ascending) Returns: PopularityBucket literal: 'Very Common', 'Common', 'Uncommon', 'Niche', or 'Rare' """ if boundaries is None: boundaries = DEFAULT_POPULARITY_BOUNDARIES total_freq = commander_count + card_count if total_freq <= boundaries[0]: return 'Rare' elif total_freq <= boundaries[1]: return 'Niche' elif total_freq <= boundaries[2]: return 'Uncommon' elif total_freq <= boundaries[3]: return 'Common' else: return 'Very Common' def generate_description( self, theme_name: str, synergies: List[str], template: str = "Builds around {theme} leveraging synergies with {synergies}." ) -> str: """Generate a basic description for a theme. This is a simple template-based fallback. The build_theme_catalog.py script has more sophisticated generation. Args: theme_name: Theme display name synergies: List of synergy theme names template: Description template with {theme} and {synergies} placeholders Returns: Generated description string """ synergy_list = synergies[:3] # Top 3 synergies if len(synergy_list) == 0: synergy_text = "its core mechanics" elif len(synergy_list) == 1: synergy_text = synergy_list[0] elif len(synergy_list) == 2: synergy_text = f"{synergy_list[0]} and {synergy_list[1]}" else: synergy_text = f"{', '.join(synergy_list[:-1])}, and {synergy_list[-1]}" return template.format(theme=theme_name, synergies=synergy_text) def infer_description_source(self, description: str) -> DescriptionSource: """Infer description source from content patterns. Heuristics: - Contains "Synergies like" → likely 'rule' (from heuristic mapping) - Starts with generic patterns → 'generic' (fallback template) - Otherwise → assume 'manual' (human-written) Args: description: Description text to analyze Returns: Inferred DescriptionSource value """ if not description: return 'generic' # Rule-based descriptions typically have synergy mentions if 'Synergies like' in description or 'synergies with' in description.lower(): return 'rule' # Generic fallback patterns generic_patterns = ['Accumulates ', 'Builds around ', 'Leverages '] if any(description.startswith(pattern) for pattern in generic_patterns): return 'generic' # Assume manual otherwise return 'manual' # M3: Card Uniqueness and Duplication Analysis def calculate_global_card_frequency(self) -> Dict[str, int]: """Calculate how many themes each card appears in (M3). Analyzes all themes to build a frequency map of cards. Returns: Dict mapping card name to theme count """ index = load_index() card_frequency: Dict[str, int] = {} for entry in index.slug_to_entry.values(): if entry.example_cards: for card in entry.example_cards: card_frequency[card] = card_frequency.get(card, 0) + 1 return card_frequency def calculate_uniqueness_ratio( self, example_cards: List[str], global_card_freq: Optional[Dict[str, int]] = None, uniqueness_threshold: float = 0.25 ) -> float: """Calculate uniqueness ratio for a theme's example cards (M3). Uniqueness = fraction of cards appearing in float: """Calculate duplication ratio for a theme's example cards (M3). Duplication = fraction of cards appearing in >X% of themes. Args: example_cards: List of card names for this theme global_card_freq: Optional pre-calculated card frequencies (will compute if None) duplication_threshold: Threshold for "duplicated" (default: 0.40 = card in >40% of themes) Returns: Ratio from 0.0 to 1.0 (higher = more generic/duplicated cards) """ if not example_cards: return 0.0 if global_card_freq is None: global_card_freq = self.calculate_global_card_frequency() index = load_index() total_themes = len(index.slug_to_entry) if total_themes == 0: return 0.0 duplicated_count = sum( 1 for card in example_cards if (global_card_freq.get(card, 0) / total_themes) > duplication_threshold ) return duplicated_count / len(example_cards) def calculate_enhanced_quality_score( self, theme_entry: ThemeEntry, global_card_freq: Optional[Dict[str, int]] = None ) -> tuple[str, float]: """Calculate enhanced editorial quality score with uniqueness (M3). Enhanced scoring algorithm: - Card count: 0-30 points (8+ cards = max) - Uniqueness ratio: 0-40 points (card in <25% of themes) - Description quality: 0-20 points (manual=20, rule=10, generic=0) - Manual curation: 0-10 points (has curated_synergies) Tiers: - Excellent: 75+ points (≥0.75) - Good: 60-74 points (0.60-0.74) - Fair: 40-59 points (0.40-0.59) - Poor: <40 points (<0.40) Args: theme_entry: ThemeEntry to score global_card_freq: Optional pre-calculated card frequencies Returns: Tuple of (tier_name, numeric_score) where score is 0.0-1.0 """ heuristics = self.load_heuristics() thresholds = heuristics.get('quality_thresholds', {}) total_points = 0.0 max_points = 100.0 # 1. Example card count (0-30 points) card_count = len(theme_entry.example_cards) if theme_entry.example_cards else 0 excellent_card_min = thresholds.get('excellent_card_min', 8) card_points = min(30.0, (card_count / excellent_card_min) * 30.0) total_points += card_points # 2. Uniqueness ratio (0-40 points) - M3 enhancement if theme_entry.example_cards: uniqueness_ratio = self.calculate_uniqueness_ratio( theme_entry.example_cards, global_card_freq ) uniqueness_points = uniqueness_ratio * 40.0 total_points += uniqueness_points # 3. Description quality (0-20 points) if theme_entry.description_source: desc_bonus = { 'manual': thresholds.get('manual_description_bonus', 10), 'rule': thresholds.get('rule_description_bonus', 5), 'generic': thresholds.get('generic_description_bonus', 0), }.get(theme_entry.description_source, 0) total_points += desc_bonus # 4. Manual curation bonus (0-10 points) - checks for curated_synergies if hasattr(theme_entry, 'curated_synergies') and theme_entry.curated_synergies: total_points += 10.0 # Normalize to 0.0-1.0 normalized_score = total_points / max_points # Determine tier using heuristics thresholds excellent_min = thresholds.get('excellent_min_score', 75) / 100.0 good_min = thresholds.get('good_min_score', 60) / 100.0 fair_min = thresholds.get('fair_min_score', 40) / 100.0 if normalized_score >= excellent_min: tier = 'Excellent' elif normalized_score >= good_min: tier = 'Good' elif normalized_score >= fair_min: tier = 'Fair' else: tier = 'Poor' return (tier, normalized_score) def get_catalog_statistics(self, use_enhanced_scoring: bool = False) -> Dict[str, Any]: """Get editorial quality statistics for entire catalog. M3 Enhancement: Optionally use enhanced quality scoring with uniqueness metrics. Args: use_enhanced_scoring: If True, use M3 enhanced scoring with uniqueness Returns: Dictionary with: - total_themes: Total number of themes - complete_editorials: Themes with all editorial fields - missing_descriptions: Count of missing descriptions - missing_examples: Count of missing example commanders/cards - quality_distribution: Dict of quality tiers and counts - average_quality_score: Mean quality score - description_source_distribution: Breakdown by source type - pinned_popularity_count: Themes with pinned popularity - [M3] average_uniqueness_ratio: Mean card uniqueness (if enhanced) - [M3] average_duplication_ratio: Mean card duplication (if enhanced) """ index = load_index() total = len(index.slug_to_entry) # Pre-calculate global card frequency for M3 enhanced scoring global_card_freq = self.calculate_global_card_frequency() if use_enhanced_scoring else None complete = 0 missing_descriptions = 0 missing_examples = 0 quality_scores = [] quality_tiers = {'Excellent': 0, 'Good': 0, 'Fair': 0, 'Poor': 0} description_sources = {'manual': 0, 'rule': 0, 'generic': 0, 'unknown': 0} pinned_count = 0 uniqueness_ratios = [] # M3 duplication_ratios = [] # M3 for entry in index.slug_to_entry.values(): # Calculate quality score (M1 or M3 version) if use_enhanced_scoring: tier, score = self.calculate_enhanced_quality_score(entry, global_card_freq) quality_scores.append(score * 100) # Convert to 0-100 scale quality_tiers[tier] += 1 # M3: Calculate uniqueness and duplication metrics if entry.example_cards: uniqueness = self.calculate_uniqueness_ratio(entry.example_cards, global_card_freq) duplication = self.calculate_duplication_ratio(entry.example_cards, global_card_freq) uniqueness_ratios.append(uniqueness) duplication_ratios.append(duplication) else: score = self.calculate_quality_score(entry) quality_scores.append(score) tier = self.get_quality_tier(score) quality_tiers[tier] += 1 # Check completeness has_all_fields = bool( entry.description and entry.example_commanders and entry.example_cards and entry.deck_archetype and entry.popularity_bucket ) if has_all_fields: complete += 1 if not entry.description: missing_descriptions += 1 if not entry.example_commanders or not entry.example_cards: missing_examples += 1 # Track description sources if entry.description_source: description_sources[entry.description_source] += 1 else: description_sources['unknown'] += 1 # Track pinned popularity if entry.popularity_pinned: pinned_count += 1 avg_score = sum(quality_scores) / len(quality_scores) if quality_scores else 0 result = { 'total_themes': total, 'complete_editorials': complete, 'missing_descriptions': missing_descriptions, 'missing_examples': missing_examples, 'quality_distribution': quality_tiers, 'average_quality_score': round(avg_score, 2), 'completeness_percentage': round((complete / total) * 100, 2) if total > 0 else 0, 'description_source_distribution': description_sources, 'pinned_popularity_count': pinned_count, } # M3: Add uniqueness metrics if using enhanced scoring if use_enhanced_scoring and uniqueness_ratios: result['average_uniqueness_ratio'] = round(sum(uniqueness_ratios) / len(uniqueness_ratios), 3) result['average_duplication_ratio'] = round(sum(duplication_ratios) / len(duplication_ratios), 3) return result # Singleton instance for module-level access _editorial_service: Optional[ThemeEditorialService] = None def get_editorial_service() -> ThemeEditorialService: """Get singleton ThemeEditorialService instance. Returns: ThemeEditorialService instance """ global _editorial_service if _editorial_service is None: _editorial_service = ThemeEditorialService() return _editorial_service