mtg_python_deckbuilder/code/scripts/generate_theme_editorial_suggestions.py

447 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Generate editorial metadata suggestions for theme YAML files (Phase D helper).
Features:
- Scans color CSV files (skips monolithic cards.csv unless --include-master)
- Collects top-N (lowest EDHREC rank) cards per theme based on themeTags column
- Optionally derives commander suggestions from commander_cards.csv (if present)
- Provides dry-run output (default) or can patch YAML files that lack example_cards / example_commanders
- Prints streaming progress so the user sees real-time status
Usage (dry run):
python code/scripts/generate_theme_editorial_suggestions.py --themes "Landfall,Reanimate" --top 8
Write back missing fields (only if not already present):
python code/scripts/generate_theme_editorial_suggestions.py --apply --limit-yaml 500
Safety:
- Existing example_cards / example_commanders are never overwritten unless --force is passed
- Writes are limited by --limit-yaml (default 0 means unlimited) to avoid massive churn accidentally
Heuristics:
- Deduplicate card names per theme
- Filter out names with extremely poor rank (> 60000) by default (configurable)
- For commander suggestions, prefer legendary creatures/planeswalkers in commander_cards.csv whose themeTags includes the theme
- Fallback commander suggestions: take top legendary cards from color CSVs tagged with the theme
- synergy_commanders: derive from top 3 synergies of each theme (3 from top, 2 from second, 1 from third)
- Promotion: if fewer than --min-examples example_commanders exist after normal suggestion, promote synergy_commanders (in order) into example_commanders, annotating with " - Synergy (<synergy name>)"
"""
from __future__ import annotations
import argparse
import ast
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Tuple, Set
import sys
try: # optional dependency safety
import yaml # type: ignore
except Exception:
yaml = None
ROOT = Path(__file__).resolve().parents[2]
CSV_DIR = ROOT / 'csv_files'
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
COLOR_CSV_GLOB = '*_cards.csv'
MASTER_FILE = 'cards.csv'
COMMANDER_FILE = 'commander_cards.csv'
@dataclass
class ThemeSuggestion:
cards: List[str]
commanders: List[str]
synergy_commanders: List[str]
def _parse_theme_tags(raw: str) -> List[str]:
if not raw:
return []
raw = raw.strip()
if not raw or raw == '[]':
return []
try:
# themeTags stored like "['Landfall', 'Ramp']" use literal_eval safely
val = ast.literal_eval(raw)
if isinstance(val, list):
return [str(x) for x in val if isinstance(x, str)]
except Exception:
pass
# Fallback naive parse
return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()]
def scan_color_csvs(include_master: bool, max_rank: float, progress_every: int) -> Tuple[Dict[str, List[Tuple[float, str]]], Dict[str, List[Tuple[float, str]]]]:
theme_hits: Dict[str, List[Tuple[float, str]]] = {}
legendary_hits: Dict[str, List[Tuple[float, str]]] = {}
files: List[Path] = []
for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
name = fp.name
if name == MASTER_FILE and not include_master:
continue
if name == COMMANDER_FILE:
continue
# skip testdata
if 'testdata' in str(fp):
continue
files.append(fp)
total_files = len(files)
processed = 0
for fp in files:
processed += 1
try:
with fp.open(encoding='utf-8', newline='') as f:
reader = csv.DictReader(f)
line_idx = 0
for row in reader:
line_idx += 1
if progress_every and line_idx % progress_every == 0:
print(f"[scan] {fp.name} line {line_idx}", file=sys.stderr, flush=True)
tags_raw = row.get('themeTags') or ''
if not tags_raw:
continue
try:
rank = float(row.get('edhrecRank') or 999999)
except Exception:
rank = 999999
if rank > max_rank:
continue
tags = _parse_theme_tags(tags_raw)
name = row.get('name') or ''
if not name:
continue
is_legendary = False
try:
typ = row.get('type') or ''
if isinstance(typ, str) and 'Legendary' in typ.split():
is_legendary = True
except Exception:
pass
for t in tags:
if not t:
continue
theme_hits.setdefault(t, []).append((rank, name))
if is_legendary:
legendary_hits.setdefault(t, []).append((rank, name))
except Exception as e: # pragma: no cover
print(f"[warn] failed reading {fp.name}: {e}", file=sys.stderr)
print(f"[scan] completed {fp.name} ({processed}/{total_files})", file=sys.stderr, flush=True)
# Trim each bucket to reasonable size (keep best ranks)
for mapping, cap in ((theme_hits, 120), (legendary_hits, 80)):
for t, lst in mapping.items():
lst.sort(key=lambda x: x[0])
if len(lst) > cap:
del lst[cap:]
return theme_hits, legendary_hits
def scan_commander_csv(max_rank: float) -> Dict[str, List[Tuple[float, str]]]:
path = CSV_DIR / COMMANDER_FILE
out: Dict[str, List[Tuple[float, str]]] = {}
if not path.exists():
return out
try:
with path.open(encoding='utf-8', newline='') as f:
reader = csv.DictReader(f)
for row in reader:
tags_raw = row.get('themeTags') or ''
if not tags_raw:
continue
tags = _parse_theme_tags(tags_raw)
try:
rank = float(row.get('edhrecRank') or 999999)
except Exception:
rank = 999999
if rank > max_rank:
continue
name = row.get('name') or ''
if not name:
continue
for t in tags:
if not t:
continue
out.setdefault(t, []).append((rank, name))
except Exception as e: # pragma: no cover
print(f"[warn] failed reading {COMMANDER_FILE}: {e}", file=sys.stderr)
for t, lst in out.items():
lst.sort(key=lambda x: x[0])
if len(lst) > 60:
del lst[60:]
return out
def load_yaml_theme(path: Path) -> dict:
try:
return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {}
except Exception:
return {}
def write_yaml_theme(path: Path, data: dict):
txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True)
path.write_text(txt, encoding='utf-8')
def build_suggestions(theme_hits: Dict[str, List[Tuple[float, str]]], commander_hits: Dict[str, List[Tuple[float, str]]], top: int, top_commanders: int, *, synergy_top=(3,2,1), min_examples: int = 5) -> Dict[str, ThemeSuggestion]:
suggestions: Dict[str, ThemeSuggestion] = {}
all_themes: Set[str] = set(theme_hits.keys()) | set(commander_hits.keys())
for t in sorted(all_themes):
card_names: List[str] = []
if t in theme_hits:
for rank, name in theme_hits[t][: top * 3]: # oversample then dedup
if name not in card_names:
card_names.append(name)
if len(card_names) >= top:
break
commander_names: List[str] = []
if t in commander_hits:
for rank, name in commander_hits[t][: top_commanders * 2]:
if name not in commander_names:
commander_names.append(name)
if len(commander_names) >= top_commanders:
break
# Placeholder synergy_commanders; will be filled later after we know synergies per theme from YAML
suggestions[t] = ThemeSuggestion(cards=card_names, commanders=commander_names, synergy_commanders=[])
return suggestions
def _derive_synergy_commanders(base_theme: str, data: dict, all_yaml: Dict[str, dict], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1)) -> List[Tuple[str, str]]:
"""Pick synergy commanders with their originating synergy label.
Returns list of (commander_name, synergy_theme) preserving order of (top synergy, second, third) and internal ranking.
"""
synergies = data.get('synergies') or []
if not isinstance(synergies, list):
return []
pattern = list(synergy_top)
out: List[Tuple[str, str]] = []
for idx, count in enumerate(pattern):
if idx >= len(synergies):
break
s_name = synergies[idx]
bucket = commander_hits.get(s_name) or []
taken = 0
for _, cname in bucket:
if all(cname != existing for existing, _ in out):
out.append((cname, s_name))
taken += 1
if taken >= count:
break
if taken < count:
# fallback to legendary card hits tagged with that synergy
fallback_bucket = legendary_hits.get(s_name) or []
for _, cname in fallback_bucket:
if all(cname != existing for existing, _ in out):
out.append((cname, s_name))
taken += 1
if taken >= count:
break
return out
def _augment_synergies(data: dict, base_theme: str) -> bool:
"""Heuristically augment the 'synergies' list when it's sparse.
Rules:
- If synergies length >= 3, leave as-is.
- Start with existing synergies then append curated/enforced/inferred (in that order) if missing.
- For any theme whose display_name contains 'Counter' add 'Counters Matter' and 'Proliferate'.
Returns True if modified.
"""
synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
if not isinstance(synergies, list):
return False
original = list(synergies)
if len(synergies) < 3:
for key in ('curated_synergies', 'enforced_synergies', 'inferred_synergies'):
lst = data.get(key)
if isinstance(lst, list):
for s in lst:
if isinstance(s, str) and s and s not in synergies:
synergies.append(s)
name = data.get('display_name') or base_theme
if isinstance(name, str) and 'counter' in name.lower():
for extra in ('Counters Matter', 'Proliferate'):
if extra not in synergies:
synergies.append(extra)
# Deduplicate preserving order
seen = set()
deduped = []
for s in synergies:
if s not in seen:
deduped.append(s)
seen.add(s)
if deduped != synergies:
synergies = deduped
if synergies != original:
data['synergies'] = synergies
return True
return False
def apply_to_yaml(suggestions: Dict[str, ThemeSuggestion], *, limit_yaml: int, force: bool, themes_filter: Set[str], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1), min_examples: int = 5, augment_synergies: bool = False, treat_placeholders_missing: bool = False):
updated = 0
# Preload all YAML for synergy lookups (avoid repeated disk IO inside loop)
all_yaml_cache: Dict[str, dict] = {}
for p in CATALOG_DIR.glob('*.yml'):
try:
all_yaml_cache[p.name] = load_yaml_theme(p)
except Exception:
pass
for path in sorted(CATALOG_DIR.glob('*.yml')):
data = load_yaml_theme(path)
if not isinstance(data, dict):
continue
display = data.get('display_name')
if not isinstance(display, str) or not display:
continue
if themes_filter and display not in themes_filter:
continue
sug = suggestions.get(display)
if not sug:
continue
changed = False
# Optional synergy augmentation prior to commander derivation
if augment_synergies and _augment_synergies(data, display):
changed = True
# Derive synergy_commanders before promotion logic
synergy_cmds = _derive_synergy_commanders(display, data, all_yaml_cache, commander_hits, legendary_hits, synergy_top=synergy_top)
# Annotate synergy_commanders with their synergy source for transparency
synergy_cmd_names = [f"{c} - Synergy ({src})" for c, src in synergy_cmds]
if (force or not data.get('example_cards')) and sug.cards:
data['example_cards'] = sug.cards
changed = True
existing_examples: List[str] = list(data.get('example_commanders') or []) if isinstance(data.get('example_commanders'), list) else []
# Treat an all-placeholder (" Anchor" suffix) list as effectively empty when flag enabled
if treat_placeholders_missing and existing_examples and all(isinstance(e, str) and e.endswith(' Anchor') for e in existing_examples):
existing_examples = []
if force or not existing_examples:
if sug.commanders:
data['example_commanders'] = list(sug.commanders)
existing_examples = data['example_commanders']
changed = True
# (Attachment of synergy_commanders moved to after promotion so we can filter duplicates with example_commanders)
# Re-annotate existing example_commanders if they use old base-theme annotation pattern
if existing_examples and synergy_cmds:
# Detect old pattern: ends with base theme name inside parentheses
needs_reannotate = False
old_suffix = f" - Synergy ({display})"
for ex in existing_examples:
if ex.endswith(old_suffix):
needs_reannotate = True
break
if needs_reannotate:
# Build mapping from commander name to synergy source
source_map = {name: src for name, src in synergy_cmds}
new_examples: List[str] = []
for ex in existing_examples:
if ' - Synergy (' in ex:
base_name = ex.split(' - Synergy ')[0]
if base_name in source_map:
new_examples.append(f"{base_name} - Synergy ({source_map[base_name]})")
continue
new_examples.append(ex)
if new_examples != existing_examples:
data['example_commanders'] = new_examples
existing_examples = new_examples
changed = True
# Promotion: ensure at least min_examples in example_commanders by moving from synergy list (without duplicates)
if (len(existing_examples) < min_examples) and synergy_cmd_names:
needed = min_examples - len(existing_examples)
promoted = []
for cname, source_synergy in synergy_cmds:
# Avoid duplicate even with annotation
if not any(cname == base.split(' - Synergy ')[0] for base in existing_examples):
annotated = f"{cname} - Synergy ({source_synergy})"
existing_examples.append(annotated)
promoted.append(cname)
needed -= 1
if needed <= 0:
break
if promoted:
data['example_commanders'] = existing_examples
changed = True
# After any potential promotions / re-annotations, attach synergy_commanders excluding any commanders already present in example_commanders
existing_base_names = {ex.split(' - Synergy ')[0] for ex in (data.get('example_commanders') or []) if isinstance(ex, str)}
filtered_synergy_cmd_names = []
for entry in synergy_cmd_names:
base = entry.split(' - Synergy ')[0]
if base not in existing_base_names:
filtered_synergy_cmd_names.append(entry)
prior_synergy_cmds = data.get('synergy_commanders') if isinstance(data.get('synergy_commanders'), list) else []
if prior_synergy_cmds != filtered_synergy_cmd_names:
if filtered_synergy_cmd_names or force or prior_synergy_cmds:
data['synergy_commanders'] = filtered_synergy_cmd_names
changed = True
if changed:
write_yaml_theme(path, data)
updated += 1
print(f"[apply] updated {path.name}")
if limit_yaml and updated >= limit_yaml:
print(f"[apply] reached limit {limit_yaml}; stopping")
break
return updated
def main(): # pragma: no cover
parser = argparse.ArgumentParser(description='Generate example_cards / example_commanders suggestions for theme YAML')
parser.add_argument('--themes', type=str, help='Comma-separated subset of display names to restrict')
parser.add_argument('--top', type=int, default=8, help='Target number of example_cards suggestions')
parser.add_argument('--top-commanders', type=int, default=5, help='Target number of example_commanders suggestions')
parser.add_argument('--max-rank', type=float, default=60000, help='Skip cards with EDHREC rank above this threshold')
parser.add_argument('--include-master', action='store_true', help='Include large cards.csv in scan (slower)')
parser.add_argument('--progress-every', type=int, default=0, help='Emit a progress line every N rows per file')
parser.add_argument('--apply', action='store_true', help='Write missing fields into YAML files')
parser.add_argument('--limit-yaml', type=int, default=0, help='Limit number of YAML files modified (0 = unlimited)')
parser.add_argument('--force', action='store_true', help='Overwrite existing example lists')
parser.add_argument('--min-examples', type=int, default=5, help='Minimum desired example_commanders; promote from synergy_commanders if short')
parser.add_argument('--augment-synergies', action='store_true', help='Heuristically augment sparse synergies list before deriving synergy_commanders')
parser.add_argument('--treat-placeholders', action='store_true', help='Consider Anchor-only example_commanders lists as missing so they can be replaced')
args = parser.parse_args()
themes_filter: Set[str] = set()
if args.themes:
themes_filter = {t.strip() for t in args.themes.split(',') if t.strip()}
print('[info] scanning CSVs...', file=sys.stderr)
theme_hits, legendary_hits = scan_color_csvs(args.include_master, args.max_rank, args.progress_every)
print('[info] scanning commander CSV...', file=sys.stderr)
commander_hits = scan_commander_csv(args.max_rank)
print('[info] building suggestions...', file=sys.stderr)
suggestions = build_suggestions(theme_hits, commander_hits, args.top, args.top_commanders, min_examples=args.min_examples)
if not args.apply:
# Dry run: print JSON-like summary for filtered subset (or first 25 themes)
to_show = sorted(themes_filter) if themes_filter else list(sorted(suggestions.keys())[:25])
for t in to_show:
s = suggestions.get(t)
if not s:
continue
print(f"\n=== {t} ===")
print('example_cards:', ', '.join(s.cards) or '(none)')
print('example_commanders:', ', '.join(s.commanders) or '(none)')
print('synergy_commanders: (computed at apply time)')
print('\n[info] dry-run complete (use --apply to write)')
return
if yaml is None:
print('ERROR: PyYAML not installed; cannot apply changes.', file=sys.stderr)
sys.exit(1)
updated = apply_to_yaml(
suggestions,
limit_yaml=args.limit_yaml,
force=args.force,
themes_filter=themes_filter,
commander_hits=commander_hits,
legendary_hits=legendary_hits,
synergy_top=(3,2,1),
min_examples=args.min_examples,
augment_synergies=args.augment_synergies,
treat_placeholders_missing=args.treat_placeholders,
)
print(f'[info] updated {updated} YAML files')
if __name__ == '__main__': # pragma: no cover
main()