mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2026-03-24 22:16:31 +01:00
feat: add theme editorial quality system with scoring, linting, and comprehensive documentation (#54)
This commit is contained in:
parent
de8087d940
commit
1ebc2fcb3c
12 changed files with 3169 additions and 157 deletions
201
code/scripts/backfill_editorial_fields.py
Normal file
201
code/scripts/backfill_editorial_fields.py
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
"""Backfill M1 editorial tracking fields (description_source, popularity_pinned) to theme YAML files.
|
||||
|
||||
This script adds tracking metadata to existing theme YAMLs to support editorial workflows:
|
||||
- description_source: Classifies descriptions as 'rule', 'generic', or 'manual'
|
||||
- popularity_pinned: Boolean flag to prevent auto-population_bucket updates
|
||||
|
||||
Usage:
|
||||
python code/scripts/backfill_editorial_fields.py [--dry-run] [--verbose]
|
||||
|
||||
Options:
|
||||
--dry-run: Show changes without writing files
|
||||
--verbose: Print detailed progress
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import yaml
|
||||
|
||||
# Add project root to path
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT / 'code'))
|
||||
|
||||
from type_definitions_theme_catalog import ThemeYAMLFile
|
||||
from web.services.theme_editorial_service import ThemeEditorialService
|
||||
|
||||
|
||||
def load_yaml_raw(file_path: Path) -> Dict:
|
||||
"""Load YAML file preserving order and comments."""
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def write_yaml_preserving_order(file_path: Path, data: Dict) -> None:
|
||||
"""Write YAML file with consistent formatting."""
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
yaml.safe_dump(
|
||||
data,
|
||||
f,
|
||||
default_flow_style=False,
|
||||
allow_unicode=True,
|
||||
sort_keys=False, # Preserve insertion order
|
||||
indent=2,
|
||||
)
|
||||
|
||||
|
||||
def backfill_theme_yaml(
|
||||
file_path: Path,
|
||||
service: ThemeEditorialService,
|
||||
dry_run: bool = False,
|
||||
verbose: bool = False
|
||||
) -> Tuple[bool, List[str]]:
|
||||
"""Backfill M1 editorial fields to a single theme YAML.
|
||||
|
||||
Args:
|
||||
file_path: Path to theme YAML file
|
||||
service: ThemeEditorialService instance for inference
|
||||
dry_run: If True, don't write changes
|
||||
verbose: If True, print detailed messages
|
||||
|
||||
Returns:
|
||||
Tuple of (modified, changes) where:
|
||||
- modified: True if file was changed
|
||||
- changes: List of change descriptions
|
||||
"""
|
||||
try:
|
||||
# Load raw YAML
|
||||
raw_data = load_yaml_raw(file_path)
|
||||
|
||||
# Validate against ThemeYAMLFile model
|
||||
theme = ThemeYAMLFile(**raw_data)
|
||||
|
||||
changes = []
|
||||
modified = False
|
||||
|
||||
# Check description_source
|
||||
if not raw_data.get('description_source'):
|
||||
if theme.description:
|
||||
inferred = service.infer_description_source(theme.description)
|
||||
raw_data['description_source'] = inferred
|
||||
changes.append(f"Added description_source='{inferred}'")
|
||||
modified = True
|
||||
else:
|
||||
changes.append("Skipped description_source (no description)")
|
||||
|
||||
# Check popularity_pinned
|
||||
if 'popularity_pinned' not in raw_data:
|
||||
raw_data['popularity_pinned'] = False
|
||||
changes.append("Added popularity_pinned=False")
|
||||
modified = True
|
||||
|
||||
# Write back if modified and not dry-run
|
||||
if modified and not dry_run:
|
||||
write_yaml_preserving_order(file_path, raw_data)
|
||||
|
||||
if verbose and modified:
|
||||
print(f"{'[DRY-RUN] ' if dry_run else ''}Modified: {file_path.name}")
|
||||
for change in changes:
|
||||
print(f" - {change}")
|
||||
|
||||
return modified, changes
|
||||
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print(f"ERROR processing {file_path.name}: {e}", file=sys.stderr)
|
||||
return False, [f"Error: {e}"]
|
||||
|
||||
|
||||
def backfill_catalog(
|
||||
catalog_dir: Path,
|
||||
dry_run: bool = False,
|
||||
verbose: bool = False
|
||||
) -> Dict[str, int]:
|
||||
"""Backfill all theme YAML files in catalog directory.
|
||||
|
||||
Args:
|
||||
catalog_dir: Path to themes/catalog/ directory
|
||||
dry_run: If True, don't write changes
|
||||
verbose: If True, print detailed progress
|
||||
|
||||
Returns:
|
||||
Statistics dict with counts
|
||||
"""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
yaml_files = sorted(catalog_dir.glob('*.yml'))
|
||||
|
||||
stats = {
|
||||
'total': len(yaml_files),
|
||||
'modified': 0,
|
||||
'unchanged': 0,
|
||||
'errors': 0,
|
||||
}
|
||||
|
||||
print(f"Processing {stats['total']} theme YAML files...")
|
||||
if dry_run:
|
||||
print("[DRY-RUN MODE] No files will be modified\n")
|
||||
|
||||
for yaml_path in yaml_files:
|
||||
modified, changes = backfill_theme_yaml(yaml_path, service, dry_run, verbose)
|
||||
|
||||
if changes and changes[0].startswith('Error:'):
|
||||
stats['errors'] += 1
|
||||
elif modified:
|
||||
stats['modified'] += 1
|
||||
else:
|
||||
stats['unchanged'] += 1
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Backfill M1 editorial tracking fields to theme YAML files"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help="Show changes without writing files"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help="Print detailed progress"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--catalog-dir',
|
||||
type=Path,
|
||||
default=ROOT / 'config' / 'themes' / 'catalog',
|
||||
help="Path to theme catalog directory (default: config/themes/catalog)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.catalog_dir.exists():
|
||||
print(f"ERROR: Catalog directory not found: {args.catalog_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Run backfill
|
||||
stats = backfill_catalog(args.catalog_dir, args.dry_run, args.verbose)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Backfill {'Summary (DRY-RUN)' if args.dry_run else 'Complete'}:")
|
||||
print(f" Total files: {stats['total']}")
|
||||
print(f" Modified: {stats['modified']}")
|
||||
print(f" Unchanged: {stats['unchanged']}")
|
||||
print(f" Errors: {stats['errors']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\nRe-run without --dry-run to apply changes.")
|
||||
|
||||
return 0 if stats['errors'] == 0 else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
|
@ -34,6 +34,7 @@ if str(CODE_ROOT) not in sys.path:
|
|||
from type_definitions_theme_catalog import ThemeCatalog, ThemeYAMLFile
|
||||
from scripts.extract_themes import load_whitelist_config
|
||||
from scripts.build_theme_catalog import build_catalog
|
||||
from web.services.theme_editorial_service import ThemeEditorialService
|
||||
|
||||
CATALOG_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
|
||||
|
||||
|
|
@ -110,13 +111,35 @@ def validate_catalog(data: Dict, *, whitelist: Dict, allow_soft_exceed: bool = T
|
|||
return errors
|
||||
|
||||
|
||||
def validate_yaml_files(*, whitelist: Dict, strict_alias: bool = False) -> List[str]:
|
||||
def validate_yaml_files(
|
||||
*,
|
||||
whitelist: Dict,
|
||||
strict_alias: bool = False,
|
||||
check_editorial_quality: bool = False,
|
||||
lint_enabled: bool = False,
|
||||
lint_duplication_threshold: float = 0.5,
|
||||
lint_quality_threshold: float = 0.3
|
||||
) -> List[str]:
|
||||
"""Validate individual YAML catalog files.
|
||||
|
||||
strict_alias: if True, treat presence of a deprecated alias (normalization key)
|
||||
as a hard error instead of a soft ignored transitional state.
|
||||
check_editorial_quality: if True, check M1 editorial quality fields (description_source, etc.).
|
||||
lint_enabled: if True, run M4 linter checks (duplication, quality scoring).
|
||||
lint_duplication_threshold: flag themes with duplication ratio above this (default 0.5).
|
||||
lint_quality_threshold: flag themes with quality score below this (default 0.3).
|
||||
"""
|
||||
errors: List[str] = []
|
||||
|
||||
# M4: Initialize editorial service for lint checks
|
||||
editorial_service = None
|
||||
global_card_freq = None
|
||||
if lint_enabled:
|
||||
try:
|
||||
editorial_service = ThemeEditorialService()
|
||||
global_card_freq = editorial_service.calculate_global_card_frequency()
|
||||
except Exception as e:
|
||||
errors.append(f"[LINT] Failed to initialize editorial service: {e}")
|
||||
catalog_dir = ROOT / 'config' / 'themes' / 'catalog'
|
||||
if not catalog_dir.exists():
|
||||
return errors
|
||||
|
|
@ -142,6 +165,72 @@ def validate_yaml_files(*, whitelist: Dict, strict_alias: bool = False) -> List[
|
|||
if obj.id in seen_ids:
|
||||
errors.append(f"Duplicate YAML id: {obj.id}")
|
||||
seen_ids.add(obj.id)
|
||||
|
||||
# M1 Editorial Field Validation (opt-in)
|
||||
if check_editorial_quality:
|
||||
if obj.description and not obj.description_source:
|
||||
errors.append(f"Missing description_source in {path.name} (has description but no source metadata)")
|
||||
if obj.description_source == 'generic':
|
||||
# Soft warning: generic descriptions should be upgraded
|
||||
errors.append(f"[QUALITY] {path.name} has generic description_source - consider upgrading to rule-based or manual")
|
||||
if obj.popularity_pinned and not obj.popularity_bucket:
|
||||
errors.append(f"Invalid configuration in {path.name}: popularity_pinned=True but popularity_bucket is missing")
|
||||
|
||||
# M4 Linter Checks (opt-in)
|
||||
if lint_enabled and editorial_service and global_card_freq is not None:
|
||||
# Only lint themes with example cards
|
||||
if obj.example_cards and len(obj.example_cards) > 0:
|
||||
# Check 1: High Duplication Ratio
|
||||
try:
|
||||
dup_ratio = editorial_service.calculate_duplication_ratio(
|
||||
example_cards=obj.example_cards,
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.4 # Cards in >40% of themes
|
||||
)
|
||||
if dup_ratio > lint_duplication_threshold:
|
||||
# Calculate total themes for identifying generic cards
|
||||
index = editorial_service.load_index()
|
||||
total_themes = len(index.slug_to_entry)
|
||||
generic_cards = [
|
||||
card for card in obj.example_cards
|
||||
if global_card_freq.get(card, 0) / max(1, total_themes) > 0.4
|
||||
]
|
||||
errors.append(
|
||||
f"[LINT-WARNING] {path.name} has high duplication ratio ({dup_ratio:.2f} > {lint_duplication_threshold}). "
|
||||
f"Generic cards: {', '.join(generic_cards[:5])}{' ...' if len(generic_cards) > 5 else ''}"
|
||||
)
|
||||
except Exception as e:
|
||||
errors.append(f"[LINT] Failed to check duplication for {path.name}: {e}")
|
||||
|
||||
# Check 2: Low Quality Score
|
||||
try:
|
||||
# Create a minimal ThemeEntry for quality scoring
|
||||
from type_definitions_theme_catalog import ThemeEntry
|
||||
theme_entry = ThemeEntry(
|
||||
theme=obj.display_name,
|
||||
example_cards=obj.example_cards,
|
||||
description_source=obj.description_source
|
||||
)
|
||||
tier, score = editorial_service.calculate_enhanced_quality_score(
|
||||
theme_entry=theme_entry,
|
||||
global_card_freq=global_card_freq
|
||||
)
|
||||
if score < lint_quality_threshold:
|
||||
suggestions = []
|
||||
if len(obj.example_cards) < 5:
|
||||
suggestions.append("Add more example cards (target: 8+)")
|
||||
if obj.description_source == 'generic':
|
||||
suggestions.append("Upgrade to manual or rule-based description")
|
||||
if dup_ratio > 0.4:
|
||||
suggestions.append("Replace generic staples with unique cards")
|
||||
|
||||
errors.append(
|
||||
f"[LINT-WARNING] {path.name} has low quality score ({score:.2f} < {lint_quality_threshold}, tier={tier}). "
|
||||
f"Suggestions: {'; '.join(suggestions) if suggestions else 'Review theme curation'}"
|
||||
)
|
||||
except Exception as e:
|
||||
errors.append(f"[LINT] Failed to check quality for {path.name}: {e}")
|
||||
|
||||
# Normalization alias check: display_name should already be normalized if in map
|
||||
if normalization_map and obj.display_name in normalization_map.keys():
|
||||
if strict_alias:
|
||||
|
|
@ -164,6 +253,10 @@ def main(): # pragma: no cover
|
|||
parser.add_argument('--fail-soft-exceed', action='store_true', help='Treat synergy list length > cap as error even for soft exceed')
|
||||
parser.add_argument('--yaml-schema', action='store_true', help='Print JSON Schema for per-file ThemeYAML and exit')
|
||||
parser.add_argument('--strict-alias', action='store_true', help='Fail if any YAML uses an alias name slated for normalization')
|
||||
parser.add_argument('--check-quality', action='store_true', help='Enable M1 editorial quality checks (description_source, popularity_pinned)')
|
||||
parser.add_argument('--lint', action='store_true', help='Enable M4 linter checks (duplication, quality scoring)')
|
||||
parser.add_argument('--lint-duplication-threshold', type=float, default=0.5, help='Duplication ratio threshold for linter warnings (default: 0.5)')
|
||||
parser.add_argument('--lint-quality-threshold', type=float, default=0.3, help='Quality score threshold for linter warnings (default: 0.3)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.schema:
|
||||
|
|
@ -184,7 +277,14 @@ def main(): # pragma: no cover
|
|||
whitelist = load_whitelist_config()
|
||||
data = load_catalog_file()
|
||||
errors = validate_catalog(data, whitelist=whitelist, allow_soft_exceed=not args.fail_soft_exceed)
|
||||
errors.extend(validate_yaml_files(whitelist=whitelist, strict_alias=args.strict_alias))
|
||||
errors.extend(validate_yaml_files(
|
||||
whitelist=whitelist,
|
||||
strict_alias=args.strict_alias,
|
||||
check_editorial_quality=args.check_quality,
|
||||
lint_enabled=args.lint,
|
||||
lint_duplication_threshold=args.lint_duplication_threshold,
|
||||
lint_quality_threshold=args.lint_quality_threshold
|
||||
))
|
||||
|
||||
if args.rebuild_pass:
|
||||
rebuilt = build_catalog(limit=0, verbose=False)
|
||||
|
|
|
|||
976
code/tests/test_theme_editorial_service.py
Normal file
976
code/tests/test_theme_editorial_service.py
Normal file
|
|
@ -0,0 +1,976 @@
|
|||
"""Tests for ThemeEditorialService (R12 M1).
|
||||
|
||||
Tests editorial quality scoring, validation, and metadata management
|
||||
following R9 testing standards.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from code.web.services.theme_editorial_service import (
|
||||
ThemeEditorialService,
|
||||
get_editorial_service,
|
||||
)
|
||||
from code.web.services.base import NotFoundError
|
||||
from code.type_definitions_theme_catalog import ThemeEntry
|
||||
|
||||
|
||||
class TestEditorialService:
|
||||
"""Test ThemeEditorialService initialization and singleton pattern."""
|
||||
|
||||
def test_service_initialization(self):
|
||||
"""Test service can be instantiated."""
|
||||
service = ThemeEditorialService()
|
||||
assert service is not None
|
||||
|
||||
def test_singleton_getter(self):
|
||||
"""Test get_editorial_service returns singleton."""
|
||||
service1 = get_editorial_service()
|
||||
service2 = get_editorial_service()
|
||||
assert service1 is service2
|
||||
|
||||
|
||||
class TestQualityScoring:
|
||||
"""Test editorial quality score calculation."""
|
||||
|
||||
def test_perfect_score(self):
|
||||
"""Test entry with all editorial fields gets high score."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='test-theme',
|
||||
theme='Test Theme',
|
||||
synergies=['Synergy1', 'Synergy2'],
|
||||
description='A comprehensive description of the theme strategy that exceeds fifty characters for bonus points.',
|
||||
example_commanders=['Commander 1', 'Commander 2', 'Commander 3', 'Commander 4'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3', 'Card 4', 'Card 5', 'Card 6'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
synergy_commanders=['Synergy Commander 1'],
|
||||
)
|
||||
score = service.calculate_quality_score(entry)
|
||||
assert score == 100, f"Expected perfect score 100, got {score}"
|
||||
|
||||
def test_minimal_score(self):
|
||||
"""Test entry with no editorial fields gets zero score."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='minimal-theme',
|
||||
theme='Minimal Theme',
|
||||
synergies=['Synergy1'],
|
||||
)
|
||||
score = service.calculate_quality_score(entry)
|
||||
assert score == 0, f"Expected score 0 for minimal entry, got {score}"
|
||||
|
||||
def test_partial_score_with_description_only(self):
|
||||
"""Test entry with only description gets appropriate score."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='desc-only',
|
||||
theme='Description Only',
|
||||
synergies=[],
|
||||
description='Short description.',
|
||||
)
|
||||
score = service.calculate_quality_score(entry)
|
||||
assert score == 20, f"Expected score 20 (description only), got {score}"
|
||||
|
||||
def test_description_length_bonus(self):
|
||||
"""Test bonus points for longer descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
# Short description
|
||||
entry_short = ThemeEntry(
|
||||
id='short',
|
||||
theme='Short',
|
||||
synergies=[],
|
||||
description='Short.',
|
||||
)
|
||||
score_short = service.calculate_quality_score(entry_short)
|
||||
|
||||
# Long description
|
||||
entry_long = ThemeEntry(
|
||||
id='long',
|
||||
theme='Long',
|
||||
synergies=[],
|
||||
description='A much longer and more comprehensive description that exceeds fifty characters.',
|
||||
)
|
||||
score_long = service.calculate_quality_score(entry_long)
|
||||
|
||||
assert score_long > score_short, "Long description should score higher"
|
||||
assert score_long == 30, f"Expected 30 (20 base + 10 bonus), got {score_long}"
|
||||
|
||||
def test_commander_count_bonus(self):
|
||||
"""Test bonus for multiple example commanders."""
|
||||
service = ThemeEditorialService()
|
||||
# Few commanders
|
||||
entry_few = ThemeEntry(
|
||||
id='few',
|
||||
theme='Few',
|
||||
synergies=[],
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
)
|
||||
score_few = service.calculate_quality_score(entry_few)
|
||||
|
||||
# Many commanders
|
||||
entry_many = ThemeEntry(
|
||||
id='many',
|
||||
theme='Many',
|
||||
synergies=[],
|
||||
example_commanders=['Commander 1', 'Commander 2', 'Commander 3', 'Commander 4'],
|
||||
)
|
||||
score_many = service.calculate_quality_score(entry_many)
|
||||
|
||||
assert score_many > score_few, "More commanders should score higher"
|
||||
assert score_few == 15, f"Expected 15 (base), got {score_few}"
|
||||
assert score_many == 25, f"Expected 25 (15 base + 10 bonus), got {score_many}"
|
||||
|
||||
def test_card_count_bonus(self):
|
||||
"""Test bonus for multiple example cards."""
|
||||
service = ThemeEditorialService()
|
||||
# Few cards
|
||||
entry_few = ThemeEntry(
|
||||
id='few',
|
||||
theme='Few',
|
||||
synergies=[],
|
||||
example_cards=['Card 1', 'Card 2'],
|
||||
)
|
||||
score_few = service.calculate_quality_score(entry_few)
|
||||
|
||||
# Many cards
|
||||
entry_many = ThemeEntry(
|
||||
id='many',
|
||||
theme='Many',
|
||||
synergies=[],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3', 'Card 4', 'Card 5', 'Card 6'],
|
||||
)
|
||||
score_many = service.calculate_quality_score(entry_many)
|
||||
|
||||
assert score_many > score_few, "More cards should score higher"
|
||||
assert score_many == 25, f"Expected 25 (15 base + 10 bonus), got {score_many}"
|
||||
|
||||
|
||||
class TestQualityTiers:
|
||||
"""Test quality tier classification. (Updated for M2 heuristics thresholds)"""
|
||||
|
||||
def test_excellent_tier(self):
|
||||
"""Test excellent tier threshold (>=75 with M2 heuristics)."""
|
||||
service = ThemeEditorialService()
|
||||
assert service.get_quality_tier(100) == 'Excellent'
|
||||
assert service.get_quality_tier(75) == 'Excellent'
|
||||
|
||||
def test_good_tier(self):
|
||||
"""Test good tier threshold (60-74 with M2 heuristics)."""
|
||||
service = ThemeEditorialService()
|
||||
assert service.get_quality_tier(74) == 'Good'
|
||||
assert service.get_quality_tier(60) == 'Good'
|
||||
|
||||
def test_fair_tier(self):
|
||||
"""Test fair tier threshold (40-59 with M2 heuristics)."""
|
||||
service = ThemeEditorialService()
|
||||
assert service.get_quality_tier(59) == 'Fair'
|
||||
assert service.get_quality_tier(40) == 'Fair'
|
||||
|
||||
def test_poor_tier(self):
|
||||
"""Test poor tier threshold (<40)."""
|
||||
service = ThemeEditorialService()
|
||||
assert service.get_quality_tier(39) == 'Poor'
|
||||
assert service.get_quality_tier(0) == 'Poor'
|
||||
|
||||
|
||||
class TestValidation:
|
||||
"""Test editorial field validation."""
|
||||
|
||||
def test_valid_entry_no_issues(self):
|
||||
"""Test fully valid entry returns empty issues list."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='valid',
|
||||
theme='Valid Theme',
|
||||
synergies=['Synergy1', 'Synergy2'],
|
||||
description='A proper description of the theme strategy with sufficient detail.',
|
||||
description_source='manual',
|
||||
example_commanders=['Commander 1', 'Commander 2', 'Commander 3'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3', 'Card 4'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert len(issues) == 0, f"Expected no issues, got {issues}"
|
||||
|
||||
def test_missing_deck_archetype(self):
|
||||
"""Test validation catches missing deck archetype."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='missing-arch',
|
||||
theme='Missing Archetype',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('deck_archetype' in issue.lower() for issue in issues)
|
||||
|
||||
def test_invalid_deck_archetype(self):
|
||||
"""Test validation catches invalid deck archetype."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='invalid-arch',
|
||||
theme='Invalid Archetype',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='InvalidArchetype', # Not in ALLOWED_DECK_ARCHETYPES
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('invalid deck_archetype' in issue.lower() for issue in issues)
|
||||
|
||||
def test_missing_popularity_bucket(self):
|
||||
"""Test validation catches missing popularity bucket."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='missing-pop',
|
||||
theme='Missing Popularity',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('popularity_bucket' in issue.lower() for issue in issues)
|
||||
|
||||
def test_insufficient_commanders(self):
|
||||
"""Test validation recommends minimum 2 commanders."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='few-cmdr',
|
||||
theme='Few Commanders',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
example_commanders=['Commander 1'], # Only 1
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('too few example_commanders' in issue.lower() for issue in issues)
|
||||
|
||||
def test_insufficient_cards(self):
|
||||
"""Test validation recommends minimum 3 cards."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='few-cards',
|
||||
theme='Few Cards',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2'], # Only 2
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('too few example_cards' in issue.lower() for issue in issues)
|
||||
|
||||
def test_missing_description(self):
|
||||
"""Test validation catches missing description."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='no-desc',
|
||||
theme='No Description',
|
||||
synergies=[],
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('description' in issue.lower() for issue in issues)
|
||||
|
||||
def test_generic_description_warning(self):
|
||||
"""Test validation flags generic auto-generated descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='generic',
|
||||
theme='Generic',
|
||||
synergies=[],
|
||||
description='Leverages something somehow.', # Generic template without synergies
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('fallback template' in issue.lower() for issue in issues)
|
||||
|
||||
|
||||
class TestDescriptionSource:
|
||||
"""Test description_source field validation and inference."""
|
||||
|
||||
def test_missing_description_source(self):
|
||||
"""Test validation catches missing description_source."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='no-source',
|
||||
theme='No Source',
|
||||
synergies=[],
|
||||
description='Has description but no source',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('description_source' in issue.lower() for issue in issues)
|
||||
|
||||
def test_generic_source_warning(self):
|
||||
"""Test warning for generic description source."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='generic-source',
|
||||
theme='Generic Source',
|
||||
synergies=[],
|
||||
description='Some description',
|
||||
description_source='generic',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
# Should have a warning about generic description source
|
||||
generic_warnings = [issue for issue in issues if 'generic' in issue.lower()]
|
||||
assert len(generic_warnings) > 0, f"Expected generic warning, got issues: {issues}"
|
||||
assert any('upgrad' in issue.lower() for issue in generic_warnings), f"Expected 'upgrad' in warning, got: {generic_warnings}"
|
||||
|
||||
def test_infer_rule_based_description(self):
|
||||
"""Test inference identifies rule-based descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
desc = "Chains spells together. Synergies like Storm and Magecraft reinforce the plan."
|
||||
source = service.infer_description_source(desc)
|
||||
assert source == 'rule'
|
||||
|
||||
def test_infer_generic_description(self):
|
||||
"""Test inference identifies generic fallback descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
desc = "Builds around this theme with various synergies."
|
||||
source = service.infer_description_source(desc)
|
||||
assert source == 'generic'
|
||||
|
||||
def test_infer_manual_description(self):
|
||||
"""Test inference identifies manual descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
desc = "This unique strategy leverages multiple vectors of advantage."
|
||||
source = service.infer_description_source(desc)
|
||||
assert source == 'manual'
|
||||
|
||||
def test_manual_description_bonus(self):
|
||||
"""Test manual descriptions score higher than rule-based."""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Entry with rule-based description
|
||||
entry_rule = ThemeEntry(
|
||||
id='rule',
|
||||
theme='Rule',
|
||||
synergies=[],
|
||||
description='A good description',
|
||||
description_source='rule',
|
||||
)
|
||||
score_rule = service.calculate_quality_score(entry_rule)
|
||||
|
||||
# Entry with manual description
|
||||
entry_manual = ThemeEntry(
|
||||
id='manual',
|
||||
theme='Manual',
|
||||
synergies=[],
|
||||
description='A good description',
|
||||
description_source='manual',
|
||||
)
|
||||
score_manual = service.calculate_quality_score(entry_manual)
|
||||
|
||||
assert score_manual > score_rule, "Manual descriptions should score higher"
|
||||
|
||||
|
||||
class TestPopularityPinning:
|
||||
"""Test popularity_pinned field behavior."""
|
||||
|
||||
def test_pinned_without_bucket_error(self):
|
||||
"""Test error when popularity_pinned is True but bucket is missing."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='pinned-no-bucket',
|
||||
theme='Pinned No Bucket',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
description_source='manual',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_pinned=True, # Pinned but no bucket
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
assert any('popularity_pinned' in issue.lower() and 'missing' in issue.lower() for issue in issues)
|
||||
|
||||
def test_pinned_with_bucket_valid(self):
|
||||
"""Test valid entry with pinned popularity."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='pinned-valid',
|
||||
theme='Pinned Valid',
|
||||
synergies=[],
|
||||
description='Description',
|
||||
description_source='manual',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Rare',
|
||||
popularity_pinned=True,
|
||||
)
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
# Should not have pinning-related issues
|
||||
assert not any('popularity_pinned' in issue.lower() for issue in issues)
|
||||
|
||||
|
||||
class TestPopularityCalculation:
|
||||
"""Test popularity bucket calculation."""
|
||||
|
||||
def test_rare_bucket(self):
|
||||
"""Test Rare bucket (lowest frequency)."""
|
||||
service = ThemeEditorialService()
|
||||
bucket = service.calculate_popularity_bucket(15, 20) # total 35, below 40
|
||||
assert bucket == 'Rare'
|
||||
|
||||
def test_niche_bucket(self):
|
||||
"""Test Niche bucket."""
|
||||
service = ThemeEditorialService()
|
||||
bucket = service.calculate_popularity_bucket(30, 40) # total 70, between 40-100
|
||||
assert bucket == 'Niche'
|
||||
|
||||
def test_uncommon_bucket(self):
|
||||
"""Test Uncommon bucket."""
|
||||
service = ThemeEditorialService()
|
||||
bucket = service.calculate_popularity_bucket(80, 80) # total 160, between 100-220
|
||||
assert bucket == 'Uncommon'
|
||||
|
||||
def test_common_bucket(self):
|
||||
"""Test Common bucket."""
|
||||
service = ThemeEditorialService()
|
||||
bucket = service.calculate_popularity_bucket(150, 150) # total 300, between 220-500
|
||||
assert bucket == 'Common'
|
||||
|
||||
def test_very_common_bucket(self):
|
||||
"""Test Very Common bucket (highest frequency)."""
|
||||
service = ThemeEditorialService()
|
||||
bucket = service.calculate_popularity_bucket(300, 300) # total 600, above 500
|
||||
assert bucket == 'Very Common'
|
||||
|
||||
def test_custom_boundaries(self):
|
||||
"""Test custom boundary values."""
|
||||
service = ThemeEditorialService()
|
||||
custom = [10, 20, 30, 40]
|
||||
bucket = service.calculate_popularity_bucket(15, 10, boundaries=custom) # total 25
|
||||
assert bucket == 'Uncommon' # Between 20 and 30 (third bucket)
|
||||
|
||||
|
||||
class TestArchetypeInference:
|
||||
"""Test deck archetype inference from theme names and synergies."""
|
||||
|
||||
def test_combo_inference(self):
|
||||
"""Test combo archetype inference."""
|
||||
service = ThemeEditorialService()
|
||||
archetype = service.infer_deck_archetype('Infinite Combo', ['Storm'])
|
||||
assert archetype == 'Combo'
|
||||
|
||||
def test_stax_inference(self):
|
||||
"""Test stax archetype inference."""
|
||||
service = ThemeEditorialService()
|
||||
archetype = service.infer_deck_archetype('Resource Denial', ['Stax', 'Tax'])
|
||||
assert archetype == 'Stax'
|
||||
|
||||
def test_voltron_inference(self):
|
||||
"""Test voltron archetype inference."""
|
||||
service = ThemeEditorialService()
|
||||
archetype = service.infer_deck_archetype('Auras Matter', ['Equipment', 'Voltron'])
|
||||
assert archetype == 'Voltron'
|
||||
|
||||
def test_no_match_returns_none(self):
|
||||
"""Test no match returns None."""
|
||||
service = ThemeEditorialService()
|
||||
archetype = service.infer_deck_archetype('Generic Theme', ['Synergy1', 'Synergy2'])
|
||||
assert archetype is None
|
||||
|
||||
|
||||
class TestDescriptionGeneration:
|
||||
"""Test description generation helpers."""
|
||||
|
||||
def test_basic_generation(self):
|
||||
"""Test basic template-based description generation."""
|
||||
service = ThemeEditorialService()
|
||||
desc = service.generate_description('Test Theme', ['Synergy1', 'Synergy2'])
|
||||
assert 'Test Theme' in desc
|
||||
assert 'Synergy1' in desc
|
||||
assert 'Synergy2' in desc
|
||||
|
||||
def test_single_synergy(self):
|
||||
"""Test description with single synergy."""
|
||||
service = ThemeEditorialService()
|
||||
desc = service.generate_description('Test', ['OnlySynergy'])
|
||||
assert 'OnlySynergy' in desc
|
||||
|
||||
def test_no_synergies(self):
|
||||
"""Test description with no synergies."""
|
||||
service = ThemeEditorialService()
|
||||
desc = service.generate_description('Test', [])
|
||||
assert 'core mechanics' in desc.lower()
|
||||
|
||||
def test_custom_template(self):
|
||||
"""Test custom description template."""
|
||||
service = ThemeEditorialService()
|
||||
template = 'Theme {theme} works with {synergies}.'
|
||||
desc = service.generate_description('TestTheme', ['Syn1', 'Syn2'], template=template)
|
||||
assert 'TestTheme' in desc
|
||||
assert 'Syn1' in desc
|
||||
|
||||
|
||||
class TestCatalogStatistics:
|
||||
"""Test catalog-wide statistics (integration test with real catalog)."""
|
||||
|
||||
def test_statistics_structure(self):
|
||||
"""Test statistics returns expected structure."""
|
||||
service = ThemeEditorialService()
|
||||
stats = service.get_catalog_statistics()
|
||||
|
||||
# Verify required keys
|
||||
assert 'total_themes' in stats
|
||||
assert 'complete_editorials' in stats
|
||||
assert 'missing_descriptions' in stats
|
||||
assert 'missing_examples' in stats
|
||||
assert 'quality_distribution' in stats
|
||||
assert 'average_quality_score' in stats
|
||||
assert 'completeness_percentage' in stats
|
||||
assert 'description_source_distribution' in stats
|
||||
assert 'pinned_popularity_count' in stats
|
||||
|
||||
# Verify quality distribution has all tiers
|
||||
quality_dist = stats['quality_distribution']
|
||||
assert 'Excellent' in quality_dist
|
||||
assert 'Good' in quality_dist
|
||||
assert 'Fair' in quality_dist
|
||||
assert 'Poor' in quality_dist
|
||||
|
||||
# Verify description source distribution has all types
|
||||
source_dist = stats['description_source_distribution']
|
||||
assert 'rule' in source_dist
|
||||
assert 'generic' in source_dist
|
||||
assert 'manual' in source_dist
|
||||
|
||||
# Verify reasonable values
|
||||
assert stats['total_themes'] > 0, "Should have at least some themes"
|
||||
assert 0 <= stats['completeness_percentage'] <= 100
|
||||
assert 0 <= stats['average_quality_score'] <= 100
|
||||
assert stats['pinned_popularity_count'] >= 0, "Pinned count cannot be negative"
|
||||
|
||||
def test_statistics_consistency(self):
|
||||
"""Test statistics internal consistency."""
|
||||
service = ThemeEditorialService()
|
||||
stats = service.get_catalog_statistics()
|
||||
|
||||
# Quality distribution sum should equal total themes
|
||||
quality_sum = sum(stats['quality_distribution'].values())
|
||||
assert quality_sum == stats['total_themes'], \
|
||||
f"Quality distribution sum ({quality_sum}) should equal total ({stats['total_themes']})"
|
||||
|
||||
|
||||
# Integration tests requiring actual theme catalog
|
||||
class TestThemeMetadataRetrieval:
|
||||
"""Test metadata retrieval from real catalog (integration tests)."""
|
||||
|
||||
def test_get_metadata_not_found(self):
|
||||
"""Test NotFoundError for non-existent theme."""
|
||||
service = ThemeEditorialService()
|
||||
with pytest.raises(NotFoundError):
|
||||
service.get_theme_metadata('NonExistentTheme99999')
|
||||
|
||||
def test_suggest_commanders_not_found(self):
|
||||
"""Test NotFoundError for non-existent theme in suggest_commanders."""
|
||||
service = ThemeEditorialService()
|
||||
with pytest.raises(NotFoundError):
|
||||
service.suggest_example_commanders('NonExistentTheme99999')
|
||||
|
||||
|
||||
# M2: Heuristics Loading Tests
|
||||
class TestHeuristicsLoading:
|
||||
"""Test M2 heuristics externalization functionality."""
|
||||
|
||||
def test_load_heuristics_success(self):
|
||||
"""Test heuristics file loads successfully."""
|
||||
service = ThemeEditorialService()
|
||||
heuristics = service.load_heuristics()
|
||||
assert isinstance(heuristics, dict)
|
||||
assert 'quality_thresholds' in heuristics
|
||||
assert 'generic_staple_cards' in heuristics
|
||||
|
||||
def test_heuristics_cached(self):
|
||||
"""Test heuristics are cached after first load."""
|
||||
service = ThemeEditorialService()
|
||||
h1 = service.load_heuristics()
|
||||
h2 = service.load_heuristics()
|
||||
assert h1 is h2 # Same object reference (cached)
|
||||
|
||||
def test_force_reload_bypasses_cache(self):
|
||||
"""Test force_reload parameter bypasses cache."""
|
||||
service = ThemeEditorialService()
|
||||
h1 = service.load_heuristics()
|
||||
h2 = service.load_heuristics(force_reload=True)
|
||||
assert isinstance(h2, dict)
|
||||
# Can't test object identity changes without modifying file
|
||||
|
||||
def test_heuristics_structure(self):
|
||||
"""Test heuristics contain expected keys."""
|
||||
service = ThemeEditorialService()
|
||||
heuristics = service.load_heuristics()
|
||||
|
||||
# Required top-level keys
|
||||
assert 'version' in heuristics
|
||||
assert 'quality_thresholds' in heuristics
|
||||
assert 'generic_staple_cards' in heuristics
|
||||
|
||||
# Quality thresholds structure
|
||||
thresholds = heuristics['quality_thresholds']
|
||||
assert 'excellent_min_score' in thresholds
|
||||
assert 'good_min_score' in thresholds
|
||||
assert 'fair_min_score' in thresholds
|
||||
assert 'manual_description_bonus' in thresholds
|
||||
assert 'rule_description_bonus' in thresholds
|
||||
assert 'generic_description_bonus' in thresholds
|
||||
|
||||
|
||||
class TestGenericCardDetection:
|
||||
"""Test M2 generic card identification functionality."""
|
||||
|
||||
def test_get_generic_staple_cards(self):
|
||||
"""Test generic staple cards list is retrieved."""
|
||||
service = ThemeEditorialService()
|
||||
generic_cards = service.get_generic_staple_cards()
|
||||
assert isinstance(generic_cards, list)
|
||||
# Should contain common staples
|
||||
assert 'Sol Ring' in generic_cards or len(generic_cards) == 0 # Allow empty for testing
|
||||
|
||||
def test_is_generic_card_sol_ring(self):
|
||||
"""Test Sol Ring is identified as generic."""
|
||||
service = ThemeEditorialService()
|
||||
# Only test if Sol Ring is in heuristics list
|
||||
if 'Sol Ring' in service.get_generic_staple_cards():
|
||||
assert service.is_generic_card('Sol Ring')
|
||||
|
||||
def test_is_generic_card_nongeneric(self):
|
||||
"""Test unique card is not identified as generic."""
|
||||
service = ThemeEditorialService()
|
||||
# Use a very specific card unlikely to be a staple
|
||||
assert not service.is_generic_card('Obscure Legendary Creature From 1995')
|
||||
|
||||
def test_quality_score_generic_penalty(self):
|
||||
"""Test quality score penalizes excessive generic cards."""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Entry with mostly generic cards
|
||||
generic_entry = ThemeEntry(
|
||||
id='generic-test',
|
||||
theme='Generic Test',
|
||||
synergies=['Synergy1'],
|
||||
description='A description.',
|
||||
description_source='manual',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=[
|
||||
'Sol Ring', 'Arcane Signet', 'Command Tower',
|
||||
'Lightning Greaves', 'Swiftfoot Boots', 'Counterspell'
|
||||
], # 6 cards, many likely generic
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
|
||||
# Entry with unique cards
|
||||
unique_entry = ThemeEntry(
|
||||
id='unique-test',
|
||||
theme='Unique Test',
|
||||
synergies=['Synergy1'],
|
||||
description='A description.',
|
||||
description_source='manual',
|
||||
example_commanders=['Commander 1', 'Commander 2'],
|
||||
example_cards=[
|
||||
'Unique Card 1', 'Unique Card 2', 'Unique Card 3',
|
||||
'Unique Card 4', 'Unique Card 5', 'Unique Card 6'
|
||||
],
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
|
||||
generic_score = service.calculate_quality_score(generic_entry)
|
||||
unique_score = service.calculate_quality_score(unique_entry)
|
||||
|
||||
# If heuristics loaded and has generic cards, unique should score higher
|
||||
if service.get_generic_staple_cards():
|
||||
assert unique_score >= generic_score
|
||||
|
||||
|
||||
class TestQualityTiersWithHeuristics:
|
||||
"""Test M2 quality tiers use external heuristics."""
|
||||
|
||||
def test_get_quality_tier_uses_heuristics(self):
|
||||
"""Test quality tier thresholds come from heuristics."""
|
||||
service = ThemeEditorialService()
|
||||
heuristics = service. load_heuristics()
|
||||
thresholds = heuristics.get('quality_thresholds', {})
|
||||
|
||||
excellent_min = thresholds.get('excellent_min_score', 75)
|
||||
good_min = thresholds.get('good_min_score', 60)
|
||||
fair_min = thresholds.get('fair_min_score', 40)
|
||||
|
||||
# Test boundary conditions
|
||||
assert service.get_quality_tier(excellent_min) == 'Excellent'
|
||||
assert service.get_quality_tier(good_min) == 'Good'
|
||||
assert service.get_quality_tier(fair_min) == 'Fair'
|
||||
assert service.get_quality_tier(fair_min - 1) == 'Poor'
|
||||
|
||||
|
||||
# M3: Card Uniqueness and Duplication Tests
|
||||
class TestGlobalCardFrequency:
|
||||
"""Test M3 global card frequency analysis."""
|
||||
|
||||
def test_calculate_global_card_frequency(self):
|
||||
"""Test global card frequency calculation."""
|
||||
service = ThemeEditorialService()
|
||||
freq = service.calculate_global_card_frequency()
|
||||
assert isinstance(freq, dict)
|
||||
# Should have some cards with frequencies
|
||||
if freq:
|
||||
assert all(isinstance(count, int) for count in freq.values())
|
||||
assert all(count > 0 for count in freq.values())
|
||||
|
||||
def test_frequency_counts_themes(self):
|
||||
"""Test frequency correctly counts theme appearances."""
|
||||
service = ThemeEditorialService()
|
||||
freq = service.calculate_global_card_frequency()
|
||||
# Any card should appear in at least 1 theme
|
||||
if freq:
|
||||
for card, count in freq.items():
|
||||
assert count >= 1, f"{card} has invalid count {count}"
|
||||
|
||||
|
||||
class TestUniquenessRatio:
|
||||
"""Test M3 uniqueness ratio calculation."""
|
||||
|
||||
def test_uniqueness_ratio_empty_cards(self):
|
||||
"""Test uniqueness ratio with no cards."""
|
||||
service = ThemeEditorialService()
|
||||
ratio = service.calculate_uniqueness_ratio([])
|
||||
assert ratio == 0.0
|
||||
|
||||
def test_uniqueness_ratio_all_unique(self):
|
||||
"""Test uniqueness ratio with all unique cards."""
|
||||
service = ThemeEditorialService()
|
||||
# Cards that don't exist should have 0 frequency = unique
|
||||
ratio = service.calculate_uniqueness_ratio(
|
||||
['Nonexistent Card A', 'Nonexistent Card B']
|
||||
)
|
||||
assert ratio == 1.0 # All unique
|
||||
|
||||
def test_uniqueness_ratio_custom_frequency(self):
|
||||
"""Test uniqueness ratio with custom frequency data."""
|
||||
service = ThemeEditorialService()
|
||||
# Simulate 100 themes total
|
||||
freq = {
|
||||
'Common Card': 80, # In 80% of themes (not unique)
|
||||
'Rare Card': 10, # In 10% of themes (unique)
|
||||
}
|
||||
ratio = service.calculate_uniqueness_ratio(
|
||||
['Common Card', 'Rare Card'],
|
||||
global_card_freq=freq,
|
||||
uniqueness_threshold=0.25 # <25% is unique
|
||||
)
|
||||
# Rare Card is unique (1 out of 2 cards)
|
||||
# Note: This test won't work perfectly without setting total_themes
|
||||
# Let's just verify it returns a value between 0 and 1
|
||||
assert 0.0 <= ratio <= 1.0
|
||||
|
||||
def test_uniqueness_ratio_threshold(self):
|
||||
"""Test uniqueness threshold parameter."""
|
||||
service = ThemeEditorialService()
|
||||
# With different thresholds, should get different results
|
||||
ratio_strict = service.calculate_uniqueness_ratio(
|
||||
['Test Card'],
|
||||
uniqueness_threshold=0.10 # Very strict (card in <10%)
|
||||
)
|
||||
ratio_lenient = service.calculate_uniqueness_ratio(
|
||||
['Test Card'],
|
||||
uniqueness_threshold=0.50 # Lenient (card in <50%)
|
||||
)
|
||||
# Both should be valid ratios
|
||||
assert 0.0 <= ratio_strict <= 1.0
|
||||
assert 0.0 <= ratio_lenient <= 1.0
|
||||
|
||||
|
||||
class TestDuplicationRatio:
|
||||
"""Test M3 duplication ratio calculation."""
|
||||
|
||||
def test_duplication_ratio_empty_cards(self):
|
||||
"""Test duplication ratio with no cards."""
|
||||
service = ThemeEditorialService()
|
||||
ratio = service.calculate_duplication_ratio([])
|
||||
assert ratio == 0.0
|
||||
|
||||
def test_duplication_ratio_all_unique(self):
|
||||
"""Test duplication ratio with all unique cards."""
|
||||
service = ThemeEditorialService()
|
||||
# Nonexistent cards have 0 frequency = not duplicated
|
||||
ratio = service.calculate_duplication_ratio(
|
||||
['Nonexistent Card A', 'Nonexistent Card B']
|
||||
)
|
||||
assert ratio == 0.0 # No duplication
|
||||
|
||||
def test_duplication_ratio_custom_frequency(self):
|
||||
"""Test duplication ratio with custom frequency data."""
|
||||
service = ThemeEditorialService()
|
||||
# This test would need mock index to work properly
|
||||
# Just verify it returns valid ratio
|
||||
ratio = service.calculate_duplication_ratio(
|
||||
['Test Card']
|
||||
)
|
||||
assert 0.0 <= ratio <= 1.0
|
||||
|
||||
def test_duplication_ratio_threshold(self):
|
||||
"""Test duplication threshold parameter."""
|
||||
service = ThemeEditorialService()
|
||||
ratio_strict = service.calculate_duplication_ratio(
|
||||
['Test Card'],
|
||||
duplication_threshold=0.50 # Card in >50% is duplicated
|
||||
)
|
||||
ratio_lenient = service.calculate_duplication_ratio(
|
||||
['Test Card'],
|
||||
duplication_threshold=0.30 # Card in >30% is duplicated
|
||||
)
|
||||
assert 0.0 <= ratio_strict <= 1.0
|
||||
assert 0.0 <= ratio_lenient <= 1.0
|
||||
|
||||
|
||||
class TestEnhancedQualityScoring:
|
||||
"""Test M3 enhanced quality scoring with uniqueness."""
|
||||
|
||||
def test_enhanced_score_structure(self):
|
||||
"""Test enhanced score returns tuple of tier and score."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='test',
|
||||
theme='Test',
|
||||
synergies=[],
|
||||
example_cards=['Card 1', 'Card 2', 'Card 3'],
|
||||
example_commanders=['Cmdr 1'],
|
||||
description='Test description.',
|
||||
description_source='manual',
|
||||
deck_archetype='Combo',
|
||||
popularity_bucket='Common',
|
||||
)
|
||||
tier, score = service.calculate_enhanced_quality_score(entry)
|
||||
assert tier in ['Excellent', 'Good', 'Fair', 'Poor']
|
||||
assert 0.0 <= score <= 1.0
|
||||
|
||||
def test_enhanced_score_many_cards(self):
|
||||
"""Test enhanced score rewards many example cards."""
|
||||
service = ThemeEditorialService()
|
||||
entry_many = ThemeEntry(
|
||||
id='many-cards',
|
||||
theme='Many Cards',
|
||||
synergies=[],
|
||||
example_cards=[f'Card {i}' for i in range(10)], # 10 cards
|
||||
example_commanders=['Cmdr 1'],
|
||||
description='Description.',
|
||||
description_source='manual',
|
||||
)
|
||||
entry_few = ThemeEntry(
|
||||
id='few-cards',
|
||||
theme='Few Cards',
|
||||
synergies=[],
|
||||
example_cards=['Card 1', 'Card 2'], # 2 cards
|
||||
example_commanders=['Cmdr 1'],
|
||||
description='Description.',
|
||||
description_source='manual',
|
||||
)
|
||||
tier_many, score_many = service.calculate_enhanced_quality_score(entry_many)
|
||||
tier_few, score_few = service.calculate_enhanced_quality_score(entry_few)
|
||||
assert score_many > score_few
|
||||
|
||||
def test_enhanced_score_manual_bonus(self):
|
||||
"""Test enhanced score rewards manual descriptions."""
|
||||
service = ThemeEditorialService()
|
||||
entry_manual = ThemeEntry(
|
||||
id='manual',
|
||||
theme='Manual',
|
||||
synergies=[],
|
||||
example_cards=['Card 1'],
|
||||
description='Description.',
|
||||
description_source='manual',
|
||||
)
|
||||
entry_generic = ThemeEntry(
|
||||
id='generic',
|
||||
theme='Generic',
|
||||
synergies=[],
|
||||
example_cards=['Card 1'],
|
||||
description='Description.',
|
||||
description_source='generic',
|
||||
)
|
||||
_, score_manual = service.calculate_enhanced_quality_score(entry_manual)
|
||||
_, score_generic = service.calculate_enhanced_quality_score(entry_generic)
|
||||
assert score_manual > score_generic
|
||||
|
||||
def test_enhanced_score_no_cards(self):
|
||||
"""Test enhanced score handles themes with no example cards."""
|
||||
service = ThemeEditorialService()
|
||||
entry = ThemeEntry(
|
||||
id='no-cards',
|
||||
theme='No Cards',
|
||||
synergies=[],
|
||||
description='Description.',
|
||||
description_source='manual',
|
||||
)
|
||||
tier, score = service.calculate_enhanced_quality_score(entry)
|
||||
assert tier == 'Poor' # Should be poor without cards
|
||||
assert score < 0.40
|
||||
|
||||
|
||||
class TestCatalogStatisticsEnhanced:
|
||||
"""Test M3 enhanced catalog statistics."""
|
||||
|
||||
def test_statistics_with_enhanced_scoring(self):
|
||||
"""Test catalog statistics with M3 enhanced scoring."""
|
||||
service = ThemeEditorialService()
|
||||
stats = service.get_catalog_statistics(use_enhanced_scoring=True)
|
||||
|
||||
# Should have all basic keys
|
||||
assert 'total_themes' in stats
|
||||
assert 'quality_distribution' in stats
|
||||
|
||||
# M3 keys should be present
|
||||
assert 'average_uniqueness_ratio' in stats
|
||||
assert 'average_duplication_ratio' in stats
|
||||
|
||||
# Ratios should be valid
|
||||
assert 0.0 <= stats['average_uniqueness_ratio'] <= 1.0
|
||||
assert 0.0 <= stats['average_duplication_ratio'] <= 1.0
|
||||
|
||||
def test_statistics_without_enhanced_scoring(self):
|
||||
"""Test catalog statistics without M3 features."""
|
||||
service = ThemeEditorialService()
|
||||
stats = service.get_catalog_statistics(use_enhanced_scoring=False)
|
||||
|
||||
# Basic keys should be present
|
||||
assert 'total_themes' in stats
|
||||
assert 'quality_distribution' in stats
|
||||
|
||||
# M3 keys should not be present
|
||||
assert 'average_uniqueness_ratio' not in stats
|
||||
assert 'average_duplication_ratio' not in stats
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
287
code/tests/test_theme_linter.py
Normal file
287
code/tests/test_theme_linter.py
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
"""Tests for M4 linter functionality in validate_theme_catalog.py"""
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from type_definitions_theme_catalog import ThemeYAMLFile, DescriptionSource
|
||||
from web.services.theme_editorial_service import ThemeEditorialService
|
||||
from web.services.theme_catalog_loader import load_index
|
||||
|
||||
|
||||
class TestLinterDuplicationChecks:
|
||||
"""Test M4 linter duplication ratio checks"""
|
||||
|
||||
def test_high_duplication_flagged(self):
|
||||
"""Themes with high duplication ratio should be flagged"""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Get actual total themes from catalog
|
||||
index = load_index()
|
||||
total_themes = len(index.slug_to_entry)
|
||||
|
||||
# Mock global frequency: Sol Ring in 60% of themes, Lightning Greaves in 50%
|
||||
# Use actual total to get realistic frequencies
|
||||
global_card_freq = {
|
||||
"Sol Ring": int(total_themes * 0.6),
|
||||
"Lightning Greaves": int(total_themes * 0.5),
|
||||
"Unique Card A": 5,
|
||||
"Unique Card B": 3
|
||||
}
|
||||
|
||||
# Theme with mostly generic cards (2/4 = 50% are generic)
|
||||
example_cards = ["Sol Ring", "Lightning Greaves", "Unique Card A", "Unique Card B"]
|
||||
|
||||
dup_ratio = service.calculate_duplication_ratio(
|
||||
example_cards=example_cards,
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.4 # >40% = duplicated
|
||||
)
|
||||
|
||||
# Should flag: 2 out of 4 cards appear in >40% of themes
|
||||
assert dup_ratio == 0.5 # 50% duplication
|
||||
|
||||
def test_low_duplication_not_flagged(self):
|
||||
"""Themes with unique cards should not be flagged"""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# All unique cards
|
||||
global_card_freq = {
|
||||
"Unique Card A": 5,
|
||||
"Unique Card B": 3,
|
||||
"Unique Card C": 8,
|
||||
"Unique Card D": 2
|
||||
}
|
||||
|
||||
example_cards = ["Unique Card A", "Unique Card B", "Unique Card C", "Unique Card D"]
|
||||
|
||||
dup_ratio = service.calculate_duplication_ratio(
|
||||
example_cards=example_cards,
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.4
|
||||
)
|
||||
|
||||
assert dup_ratio == 0.0 # No duplication
|
||||
|
||||
def test_empty_cards_no_duplication(self):
|
||||
"""Empty example cards should return 0.0 duplication"""
|
||||
service = ThemeEditorialService()
|
||||
global_card_freq = {"Sol Ring": 60}
|
||||
|
||||
dup_ratio = service.calculate_duplication_ratio(
|
||||
example_cards=[],
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.4
|
||||
)
|
||||
|
||||
assert dup_ratio == 0.0
|
||||
|
||||
|
||||
class TestLinterQualityScoring:
|
||||
"""Test M4 linter quality score checks"""
|
||||
|
||||
def test_low_quality_score_flagged(self):
|
||||
"""Themes with low quality scores should be flagged"""
|
||||
from type_definitions_theme_catalog import ThemeEntry
|
||||
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Low quality theme: few cards, generic description, no uniqueness
|
||||
theme_entry = ThemeEntry(
|
||||
theme="Test Theme",
|
||||
example_cards=["Sol Ring", "Command Tower"], # Only 2 cards
|
||||
description_source="generic"
|
||||
)
|
||||
|
||||
global_card_freq = {
|
||||
"Sol Ring": 80, # Very common
|
||||
"Command Tower": 75 # Very common
|
||||
}
|
||||
|
||||
tier, score = service.calculate_enhanced_quality_score(
|
||||
theme_entry=theme_entry,
|
||||
global_card_freq=global_card_freq
|
||||
)
|
||||
|
||||
assert tier in ["Poor", "Fair"]
|
||||
assert score < 0.5 # Below typical threshold
|
||||
|
||||
def test_high_quality_score_not_flagged(self):
|
||||
"""Themes with high quality scores should not be flagged"""
|
||||
from type_definitions_theme_catalog import ThemeEntry
|
||||
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# High quality theme: many unique cards, manual description
|
||||
theme_entry = ThemeEntry(
|
||||
theme="Test Theme",
|
||||
example_cards=[f"Unique Card {i}" for i in range(10)], # 10 unique cards
|
||||
description_source="manual"
|
||||
)
|
||||
|
||||
global_card_freq = {f"Unique Card {i}": 2 for i in range(10)} # All rare
|
||||
|
||||
tier, score = service.calculate_enhanced_quality_score(
|
||||
theme_entry=theme_entry,
|
||||
global_card_freq=global_card_freq
|
||||
)
|
||||
|
||||
assert tier in ["Good", "Excellent"]
|
||||
assert score >= 0.6 # Above typical threshold
|
||||
|
||||
|
||||
class TestLinterSuggestions:
|
||||
"""Test M4 linter suggestion generation"""
|
||||
|
||||
def test_suggestions_for_few_cards(self):
|
||||
"""Should suggest adding more cards when count is low"""
|
||||
example_cards = ["Card A", "Card B", "Card C"] # Only 3 cards
|
||||
|
||||
suggestions = []
|
||||
if len(example_cards) < 5:
|
||||
suggestions.append("Add more example cards (target: 8+)")
|
||||
|
||||
assert len(suggestions) == 1
|
||||
assert "Add more example cards" in suggestions[0]
|
||||
|
||||
def test_suggestions_for_generic_description(self):
|
||||
"""Should suggest upgrading description when generic"""
|
||||
description_source = "generic"
|
||||
|
||||
suggestions = []
|
||||
if description_source == "generic":
|
||||
suggestions.append("Upgrade to manual or rule-based description")
|
||||
|
||||
assert len(suggestions) == 1
|
||||
assert "Upgrade to manual or rule-based" in suggestions[0]
|
||||
|
||||
def test_suggestions_for_generic_cards(self):
|
||||
"""Should suggest replacing generic cards when duplication high"""
|
||||
dup_ratio = 0.6 # 60% duplication
|
||||
|
||||
suggestions = []
|
||||
if dup_ratio > 0.4:
|
||||
suggestions.append("Replace generic staples with unique cards")
|
||||
|
||||
assert len(suggestions) == 1
|
||||
assert "Replace generic staples" in suggestions[0]
|
||||
|
||||
def test_multiple_suggestions_combined(self):
|
||||
"""Should provide multiple suggestions when multiple issues exist"""
|
||||
example_cards = ["Card A", "Card B"] # Few cards
|
||||
description_source = "generic"
|
||||
dup_ratio = 0.5 # High duplication
|
||||
|
||||
suggestions = []
|
||||
if len(example_cards) < 5:
|
||||
suggestions.append("Add more example cards (target: 8+)")
|
||||
if description_source == "generic":
|
||||
suggestions.append("Upgrade to manual or rule-based description")
|
||||
if dup_ratio > 0.4:
|
||||
suggestions.append("Replace generic staples with unique cards")
|
||||
|
||||
assert len(suggestions) == 3
|
||||
assert "Add more example cards" in suggestions[0]
|
||||
assert "Upgrade to manual or rule-based" in suggestions[1]
|
||||
assert "Replace generic staples" in suggestions[2]
|
||||
|
||||
|
||||
class TestLinterThresholds:
|
||||
"""Test M4 linter configurable thresholds"""
|
||||
|
||||
def test_duplication_threshold_configurable(self):
|
||||
"""Duplication threshold should be configurable"""
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Get actual total themes from catalog
|
||||
index = load_index()
|
||||
total_themes = len(index.slug_to_entry)
|
||||
|
||||
# Sol Ring at 45% frequency
|
||||
global_card_freq = {
|
||||
"Sol Ring": int(total_themes * 0.45),
|
||||
"Unique Card": 5
|
||||
}
|
||||
|
||||
example_cards = ["Sol Ring", "Unique Card"]
|
||||
|
||||
# With threshold 0.5 (50%), Sol Ring not flagged
|
||||
dup_ratio_high = service.calculate_duplication_ratio(
|
||||
example_cards=example_cards,
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.5
|
||||
)
|
||||
assert dup_ratio_high == 0.0 # 45% < 50%
|
||||
|
||||
# With threshold 0.4 (40%), Sol Ring IS flagged
|
||||
dup_ratio_low = service.calculate_duplication_ratio(
|
||||
example_cards=example_cards,
|
||||
global_card_freq=global_card_freq,
|
||||
duplication_threshold=0.4
|
||||
)
|
||||
assert dup_ratio_low == 0.5 # 45% > 40%, so 1/2 cards flagged
|
||||
|
||||
def test_quality_threshold_configurable(self):
|
||||
"""Quality threshold determines what gets flagged"""
|
||||
# Threshold 0.3 would flag scores < 0.3
|
||||
score_fair = 0.45
|
||||
|
||||
assert score_fair < 0.5 # Would be flagged with threshold 0.5
|
||||
assert score_fair >= 0.3 # Would NOT be flagged with threshold 0.3
|
||||
|
||||
|
||||
class TestLinterIntegration:
|
||||
"""Integration tests for linter with ThemeYAMLFile validation"""
|
||||
|
||||
def test_yaml_file_to_theme_entry_conversion(self):
|
||||
"""Should correctly convert ThemeYAMLFile to ThemeEntry for linting"""
|
||||
from type_definitions_theme_catalog import ThemeEntry
|
||||
|
||||
# Simulate a ThemeYAMLFile object
|
||||
yaml_data = {
|
||||
"id": "test-theme",
|
||||
"display_name": "Test Theme",
|
||||
"synergies": ["Synergy A", "Synergy B"],
|
||||
"example_cards": ["Card A", "Card B", "Card C"],
|
||||
"description_source": "manual",
|
||||
"description": "A test theme for linting"
|
||||
}
|
||||
|
||||
yaml_file = ThemeYAMLFile(**yaml_data)
|
||||
|
||||
# Convert to ThemeEntry for linting
|
||||
theme_entry = ThemeEntry(
|
||||
theme=yaml_file.display_name,
|
||||
example_cards=yaml_file.example_cards,
|
||||
description_source=yaml_file.description_source
|
||||
)
|
||||
|
||||
assert theme_entry.theme == "Test Theme"
|
||||
assert len(theme_entry.example_cards) == 3
|
||||
assert theme_entry.description_source == "manual"
|
||||
|
||||
def test_linter_handles_missing_optional_fields(self):
|
||||
"""Linter should handle themes with missing optional fields gracefully"""
|
||||
from type_definitions_theme_catalog import ThemeEntry
|
||||
|
||||
# Theme with minimal required fields
|
||||
theme_entry = ThemeEntry(
|
||||
theme="Minimal Theme",
|
||||
example_cards=["Card A"],
|
||||
description_source=None # Missing description_source
|
||||
)
|
||||
|
||||
service = ThemeEditorialService()
|
||||
|
||||
# Should not crash
|
||||
tier, score = service.calculate_enhanced_quality_score(
|
||||
theme_entry=theme_entry,
|
||||
global_card_freq={"Card A": 1}
|
||||
)
|
||||
|
||||
assert isinstance(tier, str)
|
||||
assert 0.0 <= score <= 1.0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
|
|
@ -18,6 +18,7 @@ ALLOWED_DECK_ARCHETYPES: List[str] = [
|
|||
]
|
||||
|
||||
PopularityBucket = Literal['Very Common', 'Common', 'Uncommon', 'Niche', 'Rare']
|
||||
DescriptionSource = Literal['rule', 'generic', 'manual']
|
||||
|
||||
|
||||
class ThemeEntry(BaseModel):
|
||||
|
|
@ -46,6 +47,14 @@ class ThemeEntry(BaseModel):
|
|||
None,
|
||||
description="Auto-generated or curated short sentence/paragraph describing the deck plan / strategic intent of the theme",
|
||||
)
|
||||
description_source: Optional[DescriptionSource] = Field(
|
||||
None,
|
||||
description="Source of description: 'rule' (external heuristic), 'generic' (fallback template), 'manual' (human-written)",
|
||||
)
|
||||
popularity_pinned: bool = Field(
|
||||
False,
|
||||
description="If True, prevents automatic updates to popularity_bucket during catalog rebuilds (preserves manual curation)",
|
||||
)
|
||||
editorial_quality: Optional[str] = Field(
|
||||
None,
|
||||
description="Lifecycle quality flag (draft|reviewed|final); optional and not yet enforced strictly",
|
||||
|
|
@ -124,6 +133,8 @@ class ThemeYAMLFile(BaseModel):
|
|||
popularity_hint: Optional[str] = None # Free-form editorial note; bucket computed during merge
|
||||
popularity_bucket: Optional[PopularityBucket] = None # Authors may pin; else derived
|
||||
description: Optional[str] = None # Curated short description (auto-generated if absent)
|
||||
description_source: Optional[DescriptionSource] = None # Source tracking (rule|generic|manual)
|
||||
popularity_pinned: bool = False # Protects popularity_bucket from auto-updates
|
||||
# Editorial quality lifecycle flag (draft|reviewed|final); optional and not yet enforced via governance.
|
||||
editorial_quality: Optional[str] = None
|
||||
# Per-file metadata (recently renamed from provenance). We intentionally keep this
|
||||
|
|
|
|||
|
|
@ -890,3 +890,96 @@ async def ingest_structured_log(request: Request, payload: dict[str, Any] = Body
|
|||
return JSONResponse({"ok": True, "count": LOG_COUNTS[event]})
|
||||
except Exception as e: # pragma: no cover
|
||||
return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
# --- Editorial API: Roadmap R12 Milestone 1 ---
|
||||
# Editorial quality scoring and metadata management endpoints
|
||||
|
||||
@router.get("/api/theme/{theme_id}/editorial")
|
||||
async def get_theme_editorial_metadata(theme_id: str):
|
||||
"""Get editorial metadata and quality score for a theme.
|
||||
|
||||
Returns:
|
||||
- theme: Theme display name
|
||||
- description: Theme description
|
||||
- example_commanders: List of example commander names
|
||||
- example_cards: List of example card names
|
||||
- synergy_commanders: List of synergy commander entries
|
||||
- deck_archetype: Deck archetype classification
|
||||
- popularity_bucket: Popularity tier
|
||||
- editorial_quality: Quality lifecycle flag
|
||||
- quality_score: Computed quality score (0-100)
|
||||
- quality_tier: Quality tier label (Excellent/Good/Fair/Poor)
|
||||
"""
|
||||
from ..services.theme_editorial_service import get_editorial_service, NotFoundError
|
||||
|
||||
service = get_editorial_service()
|
||||
try:
|
||||
metadata = service.get_theme_metadata(theme_id)
|
||||
score = metadata['quality_score']
|
||||
tier = service.get_quality_tier(score)
|
||||
metadata['quality_tier'] = tier
|
||||
return JSONResponse(metadata)
|
||||
except NotFoundError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to retrieve editorial metadata: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/api/editorial/statistics")
|
||||
async def get_editorial_statistics():
|
||||
"""Get editorial quality statistics for entire catalog.
|
||||
|
||||
Returns:
|
||||
- total_themes: Total number of themes
|
||||
- complete_editorials: Themes with all editorial fields
|
||||
- missing_descriptions: Count of missing descriptions
|
||||
- missing_examples: Count of missing example commanders/cards
|
||||
- quality_distribution: Dict of quality tiers and counts
|
||||
- average_quality_score: Mean quality score
|
||||
- completeness_percentage: Percentage with complete editorials
|
||||
"""
|
||||
from ..services.theme_editorial_service import get_editorial_service
|
||||
|
||||
service = get_editorial_service()
|
||||
try:
|
||||
stats = service.get_catalog_statistics()
|
||||
return JSONResponse(stats)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to retrieve statistics: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/api/theme/{theme_id}/validate")
|
||||
async def validate_theme_editorial(theme_id: str):
|
||||
"""Validate editorial fields for a theme.
|
||||
|
||||
Returns:
|
||||
- theme: Theme display name
|
||||
- valid: Boolean indicating if all validations pass
|
||||
- issues: List of validation issue messages
|
||||
- quality_score: Current quality score
|
||||
"""
|
||||
from ..services.theme_editorial_service import get_editorial_service, NotFoundError
|
||||
from ..services.theme_catalog_loader import load_index, slugify
|
||||
|
||||
service = get_editorial_service()
|
||||
try:
|
||||
slug = slugify(theme_id)
|
||||
index = load_index()
|
||||
if slug not in index.slug_to_entry:
|
||||
raise NotFoundError(f"Theme not found: {theme_id}")
|
||||
|
||||
entry = index.slug_to_entry[slug]
|
||||
issues = service.validate_editorial_fields(entry)
|
||||
score = service.calculate_quality_score(entry)
|
||||
|
||||
return JSONResponse({
|
||||
'theme': entry.theme,
|
||||
'valid': len(issues) == 0,
|
||||
'issues': issues,
|
||||
'quality_score': score,
|
||||
})
|
||||
except NotFoundError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
|
||||
|
|
|
|||
824
code/web/services/theme_editorial_service.py
Normal file
824
code/web/services/theme_editorial_service.py
Normal file
|
|
@ -0,0 +1,824 @@
|
|||
"""Theme editorial service for quality scoring and metadata management.
|
||||
|
||||
Roadmap R12 Milestones 1-2: Editorial Fields + Heuristics Externalization
|
||||
Phase E+ enhancement for theme catalog editorial metadata.
|
||||
|
||||
Responsibilities:
|
||||
- Calculate editorial quality scores for theme entries
|
||||
- Validate editorial field completeness and consistency
|
||||
- Suggest example commanders and cards for themes
|
||||
- Infer deck archetypes from theme patterns
|
||||
- Calculate popularity buckets from commander/card counts
|
||||
- Load and apply external editorial heuristics
|
||||
- Provide editorial metadata APIs for frontend consumption
|
||||
|
||||
Follows R9 Backend Unification patterns:
|
||||
- Extends BaseService
|
||||
- Uses structured error handling (ValidationError, NotFoundError)
|
||||
- Integrates with existing theme_catalog_loader infrastructure
|
||||
- Provides telemetry integration points
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional, Any
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import yaml
|
||||
|
||||
from .base import BaseService, NotFoundError
|
||||
from .theme_catalog_loader import load_index, slugify
|
||||
|
||||
try:
|
||||
from type_definitions_theme_catalog import ThemeEntry, PopularityBucket, ALLOWED_DECK_ARCHETYPES, DescriptionSource
|
||||
except ImportError: # pragma: no cover
|
||||
from code.type_definitions_theme_catalog import ThemeEntry, PopularityBucket, ALLOWED_DECK_ARCHETYPES, DescriptionSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default heuristics path (can be overridden in __init__)
|
||||
# Path calculation: from code/web/services/ → code/web/ → code/ → project root
|
||||
DEFAULT_HEURISTICS_PATH = Path(__file__).resolve().parents[3] / 'config' / 'themes' / 'editorial_heuristics.yml'
|
||||
|
||||
# Legacy constants (will be loaded from heuristics file in M2, kept for backward compatibility)
|
||||
WEIGHT_HAS_DESCRIPTION = 20
|
||||
WEIGHT_HAS_EXAMPLE_COMMANDERS = 15
|
||||
WEIGHT_HAS_EXAMPLE_CARDS = 15
|
||||
WEIGHT_HAS_DECK_ARCHETYPE = 10
|
||||
WEIGHT_HAS_POPULARITY_BUCKET = 10
|
||||
WEIGHT_HAS_SYNERGY_COMMANDERS = 10
|
||||
WEIGHT_DESCRIPTION_LENGTH_BONUS = 10
|
||||
WEIGHT_MULTIPLE_EXAMPLE_COMMANDERS = 10
|
||||
WEIGHT_MULTIPLE_EXAMPLE_CARDS = 10
|
||||
|
||||
QUALITY_EXCELLENT = 85
|
||||
QUALITY_GOOD = 65
|
||||
QUALITY_FAIR = 40
|
||||
|
||||
DEFAULT_POPULARITY_BOUNDARIES = [40, 100, 220, 500]
|
||||
|
||||
ARCHETYPE_KEYWORDS: Dict[str, List[str]] = {
|
||||
'Combo': ['combo', 'infinite', 'storm'],
|
||||
'Stax': ['stax', 'tax', 'lock', 'denial'],
|
||||
'Voltron': ['voltron', 'aura', 'equipment'],
|
||||
'Aggro': ['aggro', 'burn', 'fast', 'pressure', 'combat'],
|
||||
'Control': ['control', 'counter', 'removal', 'wipes'],
|
||||
'Midrange': ['midrange', 'value', 'flexible'],
|
||||
'Graveyard': ['graveyard', 'reanimate', 'dredge', 'recursion'],
|
||||
'Tokens': ['tokens', 'wide', 'go-wide'],
|
||||
'Counters': ['+1/+1', 'counters', 'proliferate'],
|
||||
'Ramp': ['ramp', 'big-mana', 'lands'],
|
||||
'Spells': ['spellslinger', 'spells-matter', 'instants', 'sorceries'],
|
||||
'Artifacts': ['artifacts', 'artifact-matters'],
|
||||
'Enchantments': ['enchantments', 'enchantress', 'constellation'],
|
||||
'Politics': ['group-hug', 'pillowfort', 'politics', 'monarch'],
|
||||
'Toolbox': ['toolbox', 'tutor', 'silver-bullet'],
|
||||
}
|
||||
|
||||
|
||||
class ThemeEditorialService(BaseService):
|
||||
"""Service for theme editorial quality scoring and metadata management.
|
||||
|
||||
Extends BaseService following R9 patterns. M2 enhancement: loads external heuristics.
|
||||
"""
|
||||
|
||||
def __init__(self, heuristics_path: Optional[Path] = None) -> None:
|
||||
"""Initialize editorial service with optional heuristics override.
|
||||
|
||||
Args:
|
||||
heuristics_path: Optional path to editorial_heuristics.yml (defaults to config/themes/)
|
||||
"""
|
||||
super().__init__()
|
||||
self._heuristics_path = heuristics_path or DEFAULT_HEURISTICS_PATH
|
||||
self._heuristics_cache: Optional[Dict[str, Any]] = None
|
||||
|
||||
def load_heuristics(self, force_reload: bool = False) -> Dict[str, Any]:
|
||||
"""Load editorial heuristics from YAML file (cached).
|
||||
|
||||
Args:
|
||||
force_reload: If True, bypass cache and reload from disk
|
||||
|
||||
Returns:
|
||||
Dictionary with heuristics configuration
|
||||
|
||||
Raises:
|
||||
NotFoundError: If heuristics file doesn't exist
|
||||
ValidationError: If heuristics file is invalid
|
||||
"""
|
||||
if self._heuristics_cache and not force_reload:
|
||||
return self._heuristics_cache
|
||||
|
||||
if not self._heuristics_path.exists():
|
||||
# Fallback to legacy behavior if heuristics file not found (cache the fallback)
|
||||
logger.warning(f"Heuristics file not found at {self._heuristics_path}, using legacy constants")
|
||||
self._heuristics_cache = {
|
||||
'quality_thresholds': {
|
||||
'excellent_min_score': QUALITY_EXCELLENT,
|
||||
'good_min_score': QUALITY_GOOD,
|
||||
'fair_min_score': QUALITY_FAIR,
|
||||
'manual_description_bonus': 10,
|
||||
'rule_description_bonus': 5,
|
||||
'generic_description_bonus': 0,
|
||||
},
|
||||
'generic_staple_cards': [],
|
||||
'archetype_keywords': ARCHETYPE_KEYWORDS,
|
||||
}
|
||||
return self._heuristics_cache
|
||||
|
||||
try:
|
||||
with open(self._heuristics_path, 'r', encoding='utf-8') as f:
|
||||
self._heuristics_cache = yaml.safe_load(f)
|
||||
|
||||
# Basic validation
|
||||
if not isinstance(self._heuristics_cache, dict):
|
||||
raise ValueError("Heuristics file must contain a YAML dictionary")
|
||||
|
||||
required_keys = ['quality_thresholds', 'generic_staple_cards']
|
||||
for key in required_keys:
|
||||
if key not in self._heuristics_cache:
|
||||
logger.warning(f"Heuristics missing required key: {key}")
|
||||
|
||||
logger.info(f"Loaded editorial heuristics from {self._heuristics_path}")
|
||||
return self._heuristics_cache
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load heuristics: {e}")
|
||||
raise NotFoundError(f"Failed to load editorial heuristics: {e}")
|
||||
|
||||
def get_generic_staple_cards(self) -> List[str]:
|
||||
"""Get list of generic staple cards from heuristics.
|
||||
|
||||
Returns:
|
||||
List of card names considered generic/staples
|
||||
"""
|
||||
heuristics = self.load_heuristics()
|
||||
return heuristics.get('generic_staple_cards', [])
|
||||
|
||||
def is_generic_card(self, card_name: str) -> bool:
|
||||
"""Check if a card is considered a generic staple.
|
||||
|
||||
Args:
|
||||
card_name: Card name to check
|
||||
|
||||
Returns:
|
||||
True if card is in generic staples list
|
||||
"""
|
||||
generic_cards = self.get_generic_staple_cards()
|
||||
return card_name in generic_cards
|
||||
|
||||
def get_theme_metadata(self, theme_name: str) -> Dict[str, Any]:
|
||||
"""Retrieve editorial metadata for a theme.
|
||||
|
||||
Args:
|
||||
theme_name: Theme display name (e.g., "Aristocrats")
|
||||
|
||||
Returns:
|
||||
Dictionary with editorial metadata including:
|
||||
- theme: Theme display name
|
||||
- description: Theme description
|
||||
- example_commanders: List of example commander names
|
||||
- example_cards: List of example card names
|
||||
- synergy_commanders: List of synergy commander entries
|
||||
- deck_archetype: Deck archetype classification
|
||||
- popularity_bucket: Popularity tier
|
||||
- editorial_quality: Quality lifecycle flag (draft|reviewed|final)
|
||||
- quality_score: Computed quality score (0-100)
|
||||
|
||||
Raises:
|
||||
NotFoundError: If theme not found in catalog
|
||||
"""
|
||||
slug = slugify(theme_name)
|
||||
index = load_index()
|
||||
|
||||
if slug not in index.slug_to_entry:
|
||||
raise NotFoundError(f"Theme not found: {theme_name}")
|
||||
|
||||
entry = index.slug_to_entry[slug]
|
||||
quality_score = self.calculate_quality_score(entry)
|
||||
|
||||
return {
|
||||
'theme': entry.theme,
|
||||
'description': entry.description or '',
|
||||
'example_commanders': entry.example_commanders or [],
|
||||
'example_cards': entry.example_cards or [],
|
||||
'synergy_commanders': entry.synergy_commanders or [],
|
||||
'deck_archetype': entry.deck_archetype,
|
||||
'popularity_bucket': entry.popularity_bucket,
|
||||
'editorial_quality': entry.editorial_quality,
|
||||
'quality_score': quality_score,
|
||||
'synergies': entry.synergies or [],
|
||||
'primary_color': entry.primary_color,
|
||||
'secondary_color': entry.secondary_color,
|
||||
}
|
||||
|
||||
def calculate_quality_score(self, theme_entry: ThemeEntry) -> int:
|
||||
"""Calculate editorial quality score for a theme entry.
|
||||
|
||||
M2 Enhancement: Uses external heuristics for thresholds and bonuses.
|
||||
|
||||
Score is based on presence and quality of editorial fields:
|
||||
- Description (20 points base, +10 if > 50 chars, +bonus for source type)
|
||||
- Example commanders (15 points base, +10 if 3+)
|
||||
- Example cards (15 points base, +10 if 5+)
|
||||
- Deck archetype (10 points)
|
||||
- Popularity bucket (10 points)
|
||||
- Synergy commanders (10 points)
|
||||
|
||||
Args:
|
||||
theme_entry: ThemeEntry Pydantic model instance
|
||||
|
||||
Returns:
|
||||
Quality score (0-100)
|
||||
"""
|
||||
heuristics = self.load_heuristics()
|
||||
thresholds = heuristics.get('quality_thresholds', {})
|
||||
|
||||
score = 0
|
||||
|
||||
# Description (20 base + 10 length bonus + source bonus)
|
||||
if theme_entry.description:
|
||||
score += WEIGHT_HAS_DESCRIPTION
|
||||
if len(theme_entry.description) > 50:
|
||||
score += WEIGHT_DESCRIPTION_LENGTH_BONUS
|
||||
# Bonus based on description source (from heuristics)
|
||||
if theme_entry.description_source:
|
||||
source_bonuses = {
|
||||
'manual': thresholds.get('manual_description_bonus', 10),
|
||||
'rule': thresholds.get('rule_description_bonus', 5),
|
||||
'generic': thresholds.get('generic_description_bonus', 0),
|
||||
}
|
||||
score += source_bonuses.get(theme_entry.description_source, 0)
|
||||
|
||||
# Example commanders
|
||||
if theme_entry.example_commanders:
|
||||
score += WEIGHT_HAS_EXAMPLE_COMMANDERS
|
||||
if len(theme_entry.example_commanders) >= 3:
|
||||
score += WEIGHT_MULTIPLE_EXAMPLE_COMMANDERS
|
||||
|
||||
# Example cards (with generic card penalty - M2 enhancement)
|
||||
if theme_entry.example_cards:
|
||||
score += WEIGHT_HAS_EXAMPLE_CARDS
|
||||
if len(theme_entry.example_cards) >= 5:
|
||||
score += WEIGHT_MULTIPLE_EXAMPLE_CARDS
|
||||
|
||||
# Penalize for too many generic staples (M2)
|
||||
generic_cards = self.get_generic_staple_cards()
|
||||
if generic_cards:
|
||||
generic_count = sum(1 for card in theme_entry.example_cards if card in generic_cards)
|
||||
generic_ratio = generic_count / max(1, len(theme_entry.example_cards))
|
||||
if generic_ratio > 0.5: # More than 50% generic
|
||||
score -= 5 # Small penalty
|
||||
|
||||
# Deck archetype
|
||||
if theme_entry.deck_archetype:
|
||||
score += WEIGHT_HAS_DECK_ARCHETYPE
|
||||
|
||||
# Popularity bucket
|
||||
if theme_entry.popularity_bucket:
|
||||
score += WEIGHT_HAS_POPULARITY_BUCKET
|
||||
|
||||
# Synergy commanders
|
||||
if theme_entry.synergy_commanders:
|
||||
score += WEIGHT_HAS_SYNERGY_COMMANDERS
|
||||
|
||||
return min(score, 100) # Cap at 100
|
||||
|
||||
def get_quality_tier(self, score: int) -> str:
|
||||
"""Convert quality score to tier label.
|
||||
|
||||
M2 Enhancement: Uses external heuristics for tier thresholds.
|
||||
|
||||
Args:
|
||||
score: Quality score (0-100)
|
||||
|
||||
Returns:
|
||||
Quality tier: 'Excellent', 'Good', 'Fair', or 'Poor'
|
||||
"""
|
||||
heuristics = self.load_heuristics()
|
||||
thresholds = heuristics.get('quality_thresholds', {})
|
||||
|
||||
excellent_min = thresholds.get('excellent_min_score', QUALITY_EXCELLENT)
|
||||
good_min = thresholds.get('good_min_score', QUALITY_GOOD)
|
||||
fair_min = thresholds.get('fair_min_score', QUALITY_FAIR)
|
||||
|
||||
if score >= excellent_min:
|
||||
return 'Excellent'
|
||||
elif score >= good_min:
|
||||
return 'Good'
|
||||
elif score >= fair_min:
|
||||
return 'Fair'
|
||||
else:
|
||||
return 'Poor'
|
||||
|
||||
def validate_editorial_fields(self, theme_entry: ThemeEntry) -> List[str]:
|
||||
"""Validate editorial fields and return list of issues.
|
||||
|
||||
Checks:
|
||||
- Deck archetype is in ALLOWED_DECK_ARCHETYPES
|
||||
- Popularity bucket is valid
|
||||
- Example commanders list is not empty
|
||||
- Example cards list is not empty
|
||||
- Description exists and is not generic fallback
|
||||
|
||||
Args:
|
||||
theme_entry: ThemeEntry Pydantic model instance
|
||||
|
||||
Returns:
|
||||
List of validation issue messages (empty if valid)
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# Deck archetype validation
|
||||
if theme_entry.deck_archetype:
|
||||
if theme_entry.deck_archetype not in ALLOWED_DECK_ARCHETYPES:
|
||||
issues.append(f"Invalid deck_archetype: {theme_entry.deck_archetype}")
|
||||
else:
|
||||
issues.append("Missing deck_archetype")
|
||||
|
||||
# Popularity bucket validation
|
||||
if not theme_entry.popularity_bucket:
|
||||
issues.append("Missing popularity_bucket")
|
||||
|
||||
# Example commanders
|
||||
if not theme_entry.example_commanders:
|
||||
issues.append("Missing example_commanders")
|
||||
elif len(theme_entry.example_commanders) < 2:
|
||||
issues.append("Too few example_commanders (minimum 2 recommended)")
|
||||
|
||||
# Example cards
|
||||
if not theme_entry.example_cards:
|
||||
issues.append("Missing example_cards")
|
||||
elif len(theme_entry.example_cards) < 3:
|
||||
issues.append("Too few example_cards (minimum 3 recommended)")
|
||||
|
||||
# Description validation
|
||||
if not theme_entry.description:
|
||||
issues.append("Missing description")
|
||||
else:
|
||||
# Check for generic auto-generated descriptions
|
||||
desc = theme_entry.description
|
||||
if any(desc.startswith(prefix) for prefix in ['Accumulates ', 'Builds around ', 'Leverages ']):
|
||||
if 'Synergies like' not in desc:
|
||||
issues.append("Description appears to be minimal fallback template")
|
||||
|
||||
# Check description_source
|
||||
if not theme_entry.description_source:
|
||||
issues.append("Missing description_source (should be 'rule', 'generic', or 'manual')")
|
||||
elif theme_entry.description_source == 'generic':
|
||||
issues.append("Description source is 'generic' - consider upgrading to rule-based or manual")
|
||||
|
||||
# Popularity pinning validation
|
||||
if theme_entry.popularity_pinned and not theme_entry.popularity_bucket:
|
||||
issues.append("popularity_pinned is True but popularity_bucket is missing")
|
||||
|
||||
return issues
|
||||
|
||||
def suggest_example_commanders(self, theme_name: str, limit: int = 5) -> List[str]:
|
||||
"""Suggest example commanders for a theme based on synergies.
|
||||
|
||||
This is a placeholder for future ML/analytics-based suggestions.
|
||||
Currently returns existing commanders or empty list.
|
||||
|
||||
Args:
|
||||
theme_name: Theme display name
|
||||
limit: Maximum number of suggestions
|
||||
|
||||
Returns:
|
||||
List of commander names (up to limit)
|
||||
|
||||
Raises:
|
||||
NotFoundError: If theme not found
|
||||
"""
|
||||
slug = slugify(theme_name)
|
||||
index = load_index()
|
||||
|
||||
if slug not in index.slug_to_entry:
|
||||
raise NotFoundError(f"Theme not found: {theme_name}")
|
||||
|
||||
entry = index.slug_to_entry[slug]
|
||||
commanders = entry.example_commanders or []
|
||||
|
||||
# Future enhancement: Query commander catalog for synergy matches
|
||||
# For now, return existing commanders
|
||||
return commanders[:limit]
|
||||
|
||||
def infer_deck_archetype(self, theme_name: str, synergies: Optional[List[str]] = None) -> Optional[str]:
|
||||
"""Infer deck archetype from theme name and synergies.
|
||||
|
||||
Uses keyword matching against ARCHETYPE_KEYWORDS.
|
||||
Returns first matching archetype or None.
|
||||
|
||||
Args:
|
||||
theme_name: Theme display name
|
||||
synergies: Optional list of synergy theme names (defaults to theme's synergies)
|
||||
|
||||
Returns:
|
||||
Deck archetype name from ALLOWED_DECK_ARCHETYPES or None
|
||||
"""
|
||||
# Get synergies if not provided
|
||||
if synergies is None:
|
||||
slug = slugify(theme_name)
|
||||
index = load_index()
|
||||
if slug in index.slug_to_entry:
|
||||
entry = index.slug_to_entry[slug]
|
||||
synergies = entry.synergies or []
|
||||
else:
|
||||
synergies = []
|
||||
|
||||
# Build search text (lowercase)
|
||||
search_text = f"{theme_name.lower()} {' '.join(s.lower() for s in synergies)}"
|
||||
|
||||
# Match against archetype keywords (ordered by specificity)
|
||||
for archetype, keywords in ARCHETYPE_KEYWORDS.items():
|
||||
for keyword in keywords:
|
||||
if keyword in search_text:
|
||||
return archetype
|
||||
|
||||
return None
|
||||
|
||||
def calculate_popularity_bucket(
|
||||
self,
|
||||
commander_count: int,
|
||||
card_count: int,
|
||||
boundaries: Optional[List[int]] = None
|
||||
) -> PopularityBucket:
|
||||
"""Calculate popularity bucket from commander/card counts.
|
||||
|
||||
Uses total frequency (commander_count + card_count) against thresholds.
|
||||
Default boundaries: [40, 100, 220, 500]
|
||||
|
||||
Args:
|
||||
commander_count: Number of commanders with this theme
|
||||
card_count: Number of cards with this theme
|
||||
boundaries: Custom boundaries (4 values, ascending)
|
||||
|
||||
Returns:
|
||||
PopularityBucket literal: 'Very Common', 'Common', 'Uncommon', 'Niche', or 'Rare'
|
||||
"""
|
||||
if boundaries is None:
|
||||
boundaries = DEFAULT_POPULARITY_BOUNDARIES
|
||||
|
||||
total_freq = commander_count + card_count
|
||||
|
||||
if total_freq <= boundaries[0]:
|
||||
return 'Rare'
|
||||
elif total_freq <= boundaries[1]:
|
||||
return 'Niche'
|
||||
elif total_freq <= boundaries[2]:
|
||||
return 'Uncommon'
|
||||
elif total_freq <= boundaries[3]:
|
||||
return 'Common'
|
||||
else:
|
||||
return 'Very Common'
|
||||
|
||||
def generate_description(
|
||||
self,
|
||||
theme_name: str,
|
||||
synergies: List[str],
|
||||
template: str = "Builds around {theme} leveraging synergies with {synergies}."
|
||||
) -> str:
|
||||
"""Generate a basic description for a theme.
|
||||
|
||||
This is a simple template-based fallback.
|
||||
The build_theme_catalog.py script has more sophisticated generation.
|
||||
|
||||
Args:
|
||||
theme_name: Theme display name
|
||||
synergies: List of synergy theme names
|
||||
template: Description template with {theme} and {synergies} placeholders
|
||||
|
||||
Returns:
|
||||
Generated description string
|
||||
"""
|
||||
synergy_list = synergies[:3] # Top 3 synergies
|
||||
|
||||
if len(synergy_list) == 0:
|
||||
synergy_text = "its core mechanics"
|
||||
elif len(synergy_list) == 1:
|
||||
synergy_text = synergy_list[0]
|
||||
elif len(synergy_list) == 2:
|
||||
synergy_text = f"{synergy_list[0]} and {synergy_list[1]}"
|
||||
else:
|
||||
synergy_text = f"{', '.join(synergy_list[:-1])}, and {synergy_list[-1]}"
|
||||
|
||||
return template.format(theme=theme_name, synergies=synergy_text)
|
||||
|
||||
def infer_description_source(self, description: str) -> DescriptionSource:
|
||||
"""Infer description source from content patterns.
|
||||
|
||||
Heuristics:
|
||||
- Contains "Synergies like" → likely 'rule' (from heuristic mapping)
|
||||
- Starts with generic patterns → 'generic' (fallback template)
|
||||
- Otherwise → assume 'manual' (human-written)
|
||||
|
||||
Args:
|
||||
description: Description text to analyze
|
||||
|
||||
Returns:
|
||||
Inferred DescriptionSource value
|
||||
"""
|
||||
if not description:
|
||||
return 'generic'
|
||||
|
||||
# Rule-based descriptions typically have synergy mentions
|
||||
if 'Synergies like' in description or 'synergies with' in description.lower():
|
||||
return 'rule'
|
||||
|
||||
# Generic fallback patterns
|
||||
generic_patterns = ['Accumulates ', 'Builds around ', 'Leverages ']
|
||||
if any(description.startswith(pattern) for pattern in generic_patterns):
|
||||
return 'generic'
|
||||
|
||||
# Assume manual otherwise
|
||||
return 'manual'
|
||||
|
||||
# M3: Card Uniqueness and Duplication Analysis
|
||||
|
||||
def calculate_global_card_frequency(self) -> Dict[str, int]:
|
||||
"""Calculate how many themes each card appears in (M3).
|
||||
|
||||
Analyzes all themes to build a frequency map of cards.
|
||||
|
||||
Returns:
|
||||
Dict mapping card name to theme count
|
||||
"""
|
||||
index = load_index()
|
||||
card_frequency: Dict[str, int] = {}
|
||||
|
||||
for entry in index.slug_to_entry.values():
|
||||
if entry.example_cards:
|
||||
for card in entry.example_cards:
|
||||
card_frequency[card] = card_frequency.get(card, 0) + 1
|
||||
|
||||
return card_frequency
|
||||
|
||||
def calculate_uniqueness_ratio(
|
||||
self,
|
||||
example_cards: List[str],
|
||||
global_card_freq: Optional[Dict[str, int]] = None,
|
||||
uniqueness_threshold: float = 0.25
|
||||
) -> float:
|
||||
"""Calculate uniqueness ratio for a theme's example cards (M3).
|
||||
|
||||
Uniqueness = fraction of cards appearing in <X% of themes.
|
||||
|
||||
Args:
|
||||
example_cards: List of card names for this theme
|
||||
global_card_freq: Optional pre-calculated card frequencies (will compute if None)
|
||||
uniqueness_threshold: Threshold for "unique" (default: 0.25 = card in <25% of themes)
|
||||
|
||||
Returns:
|
||||
Ratio from 0.0 to 1.0 (higher = more unique cards)
|
||||
"""
|
||||
if not example_cards:
|
||||
return 0.0
|
||||
|
||||
if global_card_freq is None:
|
||||
global_card_freq = self.calculate_global_card_frequency()
|
||||
|
||||
index = load_index()
|
||||
total_themes = len(index.slug_to_entry)
|
||||
|
||||
if total_themes == 0:
|
||||
return 0.0
|
||||
|
||||
unique_count = sum(
|
||||
1 for card in example_cards
|
||||
if (global_card_freq.get(card, 0) / total_themes) < uniqueness_threshold
|
||||
)
|
||||
|
||||
return unique_count / len(example_cards)
|
||||
|
||||
def calculate_duplication_ratio(
|
||||
self,
|
||||
example_cards: List[str],
|
||||
global_card_freq: Optional[Dict[str, int]] = None,
|
||||
duplication_threshold: float = 0.40
|
||||
) -> float:
|
||||
"""Calculate duplication ratio for a theme's example cards (M3).
|
||||
|
||||
Duplication = fraction of cards appearing in >X% of themes.
|
||||
|
||||
Args:
|
||||
example_cards: List of card names for this theme
|
||||
global_card_freq: Optional pre-calculated card frequencies (will compute if None)
|
||||
duplication_threshold: Threshold for "duplicated" (default: 0.40 = card in >40% of themes)
|
||||
|
||||
Returns:
|
||||
Ratio from 0.0 to 1.0 (higher = more generic/duplicated cards)
|
||||
"""
|
||||
if not example_cards:
|
||||
return 0.0
|
||||
|
||||
if global_card_freq is None:
|
||||
global_card_freq = self.calculate_global_card_frequency()
|
||||
|
||||
index = load_index()
|
||||
total_themes = len(index.slug_to_entry)
|
||||
|
||||
if total_themes == 0:
|
||||
return 0.0
|
||||
|
||||
duplicated_count = sum(
|
||||
1 for card in example_cards
|
||||
if (global_card_freq.get(card, 0) / total_themes) > duplication_threshold
|
||||
)
|
||||
|
||||
return duplicated_count / len(example_cards)
|
||||
|
||||
def calculate_enhanced_quality_score(
|
||||
self,
|
||||
theme_entry: ThemeEntry,
|
||||
global_card_freq: Optional[Dict[str, int]] = None
|
||||
) -> tuple[str, float]:
|
||||
"""Calculate enhanced editorial quality score with uniqueness (M3).
|
||||
|
||||
Enhanced scoring algorithm:
|
||||
- Card count: 0-30 points (8+ cards = max)
|
||||
- Uniqueness ratio: 0-40 points (card in <25% of themes)
|
||||
- Description quality: 0-20 points (manual=20, rule=10, generic=0)
|
||||
- Manual curation: 0-10 points (has curated_synergies)
|
||||
|
||||
Tiers:
|
||||
- Excellent: 75+ points (≥0.75)
|
||||
- Good: 60-74 points (0.60-0.74)
|
||||
- Fair: 40-59 points (0.40-0.59)
|
||||
- Poor: <40 points (<0.40)
|
||||
|
||||
Args:
|
||||
theme_entry: ThemeEntry to score
|
||||
global_card_freq: Optional pre-calculated card frequencies
|
||||
|
||||
Returns:
|
||||
Tuple of (tier_name, numeric_score) where score is 0.0-1.0
|
||||
"""
|
||||
heuristics = self.load_heuristics()
|
||||
thresholds = heuristics.get('quality_thresholds', {})
|
||||
|
||||
total_points = 0.0
|
||||
max_points = 100.0
|
||||
|
||||
# 1. Example card count (0-30 points)
|
||||
card_count = len(theme_entry.example_cards) if theme_entry.example_cards else 0
|
||||
excellent_card_min = thresholds.get('excellent_card_min', 8)
|
||||
card_points = min(30.0, (card_count / excellent_card_min) * 30.0)
|
||||
total_points += card_points
|
||||
|
||||
# 2. Uniqueness ratio (0-40 points) - M3 enhancement
|
||||
if theme_entry.example_cards:
|
||||
uniqueness_ratio = self.calculate_uniqueness_ratio(
|
||||
theme_entry.example_cards,
|
||||
global_card_freq
|
||||
)
|
||||
uniqueness_points = uniqueness_ratio * 40.0
|
||||
total_points += uniqueness_points
|
||||
|
||||
# 3. Description quality (0-20 points)
|
||||
if theme_entry.description_source:
|
||||
desc_bonus = {
|
||||
'manual': thresholds.get('manual_description_bonus', 10),
|
||||
'rule': thresholds.get('rule_description_bonus', 5),
|
||||
'generic': thresholds.get('generic_description_bonus', 0),
|
||||
}.get(theme_entry.description_source, 0)
|
||||
total_points += desc_bonus
|
||||
|
||||
# 4. Manual curation bonus (0-10 points) - checks for curated_synergies
|
||||
if hasattr(theme_entry, 'curated_synergies') and theme_entry.curated_synergies:
|
||||
total_points += 10.0
|
||||
|
||||
# Normalize to 0.0-1.0
|
||||
normalized_score = total_points / max_points
|
||||
|
||||
# Determine tier using heuristics thresholds
|
||||
excellent_min = thresholds.get('excellent_min_score', 75) / 100.0
|
||||
good_min = thresholds.get('good_min_score', 60) / 100.0
|
||||
fair_min = thresholds.get('fair_min_score', 40) / 100.0
|
||||
|
||||
if normalized_score >= excellent_min:
|
||||
tier = 'Excellent'
|
||||
elif normalized_score >= good_min:
|
||||
tier = 'Good'
|
||||
elif normalized_score >= fair_min:
|
||||
tier = 'Fair'
|
||||
else:
|
||||
tier = 'Poor'
|
||||
|
||||
return (tier, normalized_score)
|
||||
|
||||
def get_catalog_statistics(self, use_enhanced_scoring: bool = False) -> Dict[str, Any]:
|
||||
"""Get editorial quality statistics for entire catalog.
|
||||
|
||||
M3 Enhancement: Optionally use enhanced quality scoring with uniqueness metrics.
|
||||
|
||||
Args:
|
||||
use_enhanced_scoring: If True, use M3 enhanced scoring with uniqueness
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- total_themes: Total number of themes
|
||||
- complete_editorials: Themes with all editorial fields
|
||||
- missing_descriptions: Count of missing descriptions
|
||||
- missing_examples: Count of missing example commanders/cards
|
||||
- quality_distribution: Dict of quality tiers and counts
|
||||
- average_quality_score: Mean quality score
|
||||
- description_source_distribution: Breakdown by source type
|
||||
- pinned_popularity_count: Themes with pinned popularity
|
||||
- [M3] average_uniqueness_ratio: Mean card uniqueness (if enhanced)
|
||||
- [M3] average_duplication_ratio: Mean card duplication (if enhanced)
|
||||
"""
|
||||
index = load_index()
|
||||
total = len(index.slug_to_entry)
|
||||
|
||||
# Pre-calculate global card frequency for M3 enhanced scoring
|
||||
global_card_freq = self.calculate_global_card_frequency() if use_enhanced_scoring else None
|
||||
|
||||
complete = 0
|
||||
missing_descriptions = 0
|
||||
missing_examples = 0
|
||||
quality_scores = []
|
||||
quality_tiers = {'Excellent': 0, 'Good': 0, 'Fair': 0, 'Poor': 0}
|
||||
description_sources = {'manual': 0, 'rule': 0, 'generic': 0, 'unknown': 0}
|
||||
pinned_count = 0
|
||||
uniqueness_ratios = [] # M3
|
||||
duplication_ratios = [] # M3
|
||||
|
||||
for entry in index.slug_to_entry.values():
|
||||
# Calculate quality score (M1 or M3 version)
|
||||
if use_enhanced_scoring:
|
||||
tier, score = self.calculate_enhanced_quality_score(entry, global_card_freq)
|
||||
quality_scores.append(score * 100) # Convert to 0-100 scale
|
||||
quality_tiers[tier] += 1
|
||||
|
||||
# M3: Calculate uniqueness and duplication metrics
|
||||
if entry.example_cards:
|
||||
uniqueness = self.calculate_uniqueness_ratio(entry.example_cards, global_card_freq)
|
||||
duplication = self.calculate_duplication_ratio(entry.example_cards, global_card_freq)
|
||||
uniqueness_ratios.append(uniqueness)
|
||||
duplication_ratios.append(duplication)
|
||||
else:
|
||||
score = self.calculate_quality_score(entry)
|
||||
quality_scores.append(score)
|
||||
tier = self.get_quality_tier(score)
|
||||
quality_tiers[tier] += 1
|
||||
|
||||
# Check completeness
|
||||
has_all_fields = bool(
|
||||
entry.description and
|
||||
entry.example_commanders and
|
||||
entry.example_cards and
|
||||
entry.deck_archetype and
|
||||
entry.popularity_bucket
|
||||
)
|
||||
if has_all_fields:
|
||||
complete += 1
|
||||
|
||||
if not entry.description:
|
||||
missing_descriptions += 1
|
||||
if not entry.example_commanders or not entry.example_cards:
|
||||
missing_examples += 1
|
||||
|
||||
# Track description sources
|
||||
if entry.description_source:
|
||||
description_sources[entry.description_source] += 1
|
||||
else:
|
||||
description_sources['unknown'] += 1
|
||||
|
||||
# Track pinned popularity
|
||||
if entry.popularity_pinned:
|
||||
pinned_count += 1
|
||||
|
||||
avg_score = sum(quality_scores) / len(quality_scores) if quality_scores else 0
|
||||
|
||||
result = {
|
||||
'total_themes': total,
|
||||
'complete_editorials': complete,
|
||||
'missing_descriptions': missing_descriptions,
|
||||
'missing_examples': missing_examples,
|
||||
'quality_distribution': quality_tiers,
|
||||
'average_quality_score': round(avg_score, 2),
|
||||
'completeness_percentage': round((complete / total) * 100, 2) if total > 0 else 0,
|
||||
'description_source_distribution': description_sources,
|
||||
'pinned_popularity_count': pinned_count,
|
||||
}
|
||||
|
||||
# M3: Add uniqueness metrics if using enhanced scoring
|
||||
if use_enhanced_scoring and uniqueness_ratios:
|
||||
result['average_uniqueness_ratio'] = round(sum(uniqueness_ratios) / len(uniqueness_ratios), 3)
|
||||
result['average_duplication_ratio'] = round(sum(duplication_ratios) / len(duplication_ratios), 3)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Singleton instance for module-level access
|
||||
_editorial_service: Optional[ThemeEditorialService] = None
|
||||
|
||||
|
||||
def get_editorial_service() -> ThemeEditorialService:
|
||||
"""Get singleton ThemeEditorialService instance.
|
||||
|
||||
Returns:
|
||||
ThemeEditorialService instance
|
||||
"""
|
||||
global _editorial_service
|
||||
if _editorial_service is None:
|
||||
_editorial_service = ThemeEditorialService()
|
||||
return _editorial_service
|
||||
Loading…
Add table
Add a link
Reference in a new issue