mtg_python_deckbuilder/code/scripts/validate_description_mapping.py

154 lines
6.7 KiB
Python

"""Validate external description mapping file for auto-description system.
Checks:
- YAML parses
- Each item has triggers (list[str]) and description (str)
- No duplicate trigger substrings across entries (first wins; duplicates may cause confusion)
- Optional mapping_version entry allowed (dict with key mapping_version)
- Warn if {SYNERGIES} placeholder unused in entries where synergy phrase seems beneficial (heuristic: contains tokens/ counters / treasure / artifact / spell / graveyard / landfall)
Exit code 0 on success, >0 on validation failure.
"""
from __future__ import annotations
import sys
from pathlib import Path
from typing import List, Dict
try:
import yaml # type: ignore
except Exception:
print("PyYAML not installed; cannot validate mapping.", file=sys.stderr)
sys.exit(2)
ROOT = Path(__file__).resolve().parents[2]
MAPPING_PATH = ROOT / 'config' / 'themes' / 'description_mapping.yml'
PAIRS_PATH = ROOT / 'config' / 'themes' / 'synergy_pairs.yml'
CLUSTERS_PATH = ROOT / 'config' / 'themes' / 'theme_clusters.yml'
CATALOG_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
SYNERGY_HINT_WORDS = [
'token', 'treasure', 'clue', 'food', 'blood', 'map', 'incubat', 'powerstone',
'counter', 'proliferate', '+1/+1', '-1/-1', 'grave', 'reanimate', 'spell', 'landfall',
'artifact', 'enchant', 'equipment', 'sacrifice'
]
def _load_theme_names():
if not CATALOG_JSON.exists():
return set()
import json
try:
data = json.loads(CATALOG_JSON.read_text(encoding='utf-8'))
return {t.get('theme') for t in data.get('themes', []) if isinstance(t, dict) and t.get('theme')}
except Exception:
return set()
def main() -> int:
if not MAPPING_PATH.exists():
print(f"Mapping file missing: {MAPPING_PATH}", file=sys.stderr)
return 1
raw = yaml.safe_load(MAPPING_PATH.read_text(encoding='utf-8'))
if not isinstance(raw, list):
print("Top-level YAML structure must be a list (items + optional mapping_version dict).", file=sys.stderr)
return 1
seen_triggers: Dict[str, str] = {}
errors: List[str] = []
warnings: List[str] = []
for idx, item in enumerate(raw):
if isinstance(item, dict) and 'mapping_version' in item:
continue
if not isinstance(item, dict):
errors.append(f"Item {idx} not a dict")
continue
triggers = item.get('triggers')
desc = item.get('description')
if not isinstance(triggers, list) or not all(isinstance(t, str) and t for t in triggers):
errors.append(f"Item {idx} has invalid triggers: {triggers}")
continue
if not isinstance(desc, str) or not desc.strip():
errors.append(f"Item {idx} missing/empty description")
continue
for t in triggers:
t_lower = t.lower()
if t_lower in seen_triggers:
warnings.append(f"Duplicate trigger '{t_lower}' (first declared earlier); consider pruning.")
else:
seen_triggers[t_lower] = 'ok'
# Heuristic synergy placeholder suggestion
if '{SYNERGIES}' not in desc:
lower_desc = desc.lower()
if any(w in lower_desc for w in SYNERGY_HINT_WORDS):
# Suggest placeholder usage
warnings.append(f"Item {idx} ('{triggers[0]}') may benefit from {{SYNERGIES}} placeholder.")
theme_names = _load_theme_names()
# Synergy pairs validation
if PAIRS_PATH.exists():
try:
pairs_raw = yaml.safe_load(PAIRS_PATH.read_text(encoding='utf-8')) or {}
pairs = pairs_raw.get('synergy_pairs', {}) if isinstance(pairs_raw, dict) else {}
if not isinstance(pairs, dict):
errors.append('synergy_pairs.yml: root.synergy_pairs must be a mapping')
else:
for theme, lst in pairs.items():
if not isinstance(lst, list):
errors.append(f'synergy_pairs.{theme} not list')
continue
seen_local = set()
for s in lst:
if s == theme:
errors.append(f'{theme} lists itself as synergy')
if s in seen_local:
errors.append(f'{theme} duplicate curated synergy {s}')
seen_local.add(s)
if len(lst) > 12:
warnings.append(f'{theme} curated synergies >12 ({len(lst)})')
if theme_names and theme not in theme_names:
warnings.append(f'{theme} not yet in catalog (pending addition)')
except Exception as e: # pragma: no cover
errors.append(f'Failed parsing synergy_pairs.yml: {e}')
# Cluster validation
if CLUSTERS_PATH.exists():
try:
clusters_raw = yaml.safe_load(CLUSTERS_PATH.read_text(encoding='utf-8')) or {}
clusters = clusters_raw.get('clusters', []) if isinstance(clusters_raw, dict) else []
if not isinstance(clusters, list):
errors.append('theme_clusters.yml: clusters must be a list')
else:
seen_ids = set()
for c in clusters:
if not isinstance(c, dict):
errors.append('cluster entry not dict')
continue
cid = c.get('id')
if not cid or cid in seen_ids:
errors.append(f'cluster id missing/duplicate: {cid}')
seen_ids.add(cid)
themes = c.get('themes') or []
if not isinstance(themes, list) or not themes:
errors.append(f'cluster {cid} missing themes list')
continue
seen_local = set()
for t in themes:
if t in seen_local:
errors.append(f'cluster {cid} duplicate theme {t}')
seen_local.add(t)
if theme_names and t not in theme_names:
warnings.append(f'cluster {cid} theme {t} not in catalog (maybe naming variant)')
except Exception as e: # pragma: no cover
errors.append(f'Failed parsing theme_clusters.yml: {e}')
if errors:
print("VALIDATION FAILURES:", file=sys.stderr)
for e in errors:
print(f" - {e}", file=sys.stderr)
return 1
if warnings:
print("Validation warnings:")
for w in warnings:
print(f" - {w}")
print(f"Mapping OK. {len(seen_triggers)} unique trigger substrings.")
return 0
if __name__ == '__main__':
raise SystemExit(main())