"""Generate editorial metadata suggestions for theme YAML files (Phase D helper). Features: - Scans color CSV files (skips monolithic cards.csv unless --include-master) - Collects top-N (lowest EDHREC rank) cards per theme based on themeTags column - Optionally derives commander suggestions from commander_cards.csv (if present) - Provides dry-run output (default) or can patch YAML files that lack example_cards / example_commanders - Prints streaming progress so the user sees real-time status Usage (dry run): python code/scripts/generate_theme_editorial_suggestions.py --themes "Landfall,Reanimate" --top 8 Write back missing fields (only if not already present): python code/scripts/generate_theme_editorial_suggestions.py --apply --limit-yaml 500 Safety: - Existing example_cards / example_commanders are never overwritten unless --force is passed - Writes are limited by --limit-yaml (default 0 means unlimited) to avoid massive churn accidentally Heuristics: - Deduplicate card names per theme - Filter out names with extremely poor rank (> 60000) by default (configurable) - For commander suggestions, prefer legendary creatures/planeswalkers in commander_cards.csv whose themeTags includes the theme - Fallback commander suggestions: take top legendary cards from color CSVs tagged with the theme - synergy_commanders: derive from top 3 synergies of each theme (3 from top, 2 from second, 1 from third) - Promotion: if fewer than --min-examples example_commanders exist after normal suggestion, promote synergy_commanders (in order) into example_commanders, annotating with " - Synergy ()" """ from __future__ import annotations import argparse import ast import csv from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Tuple, Set import sys try: # optional dependency safety import yaml # type: ignore except Exception: yaml = None ROOT = Path(__file__).resolve().parents[2] CSV_DIR = ROOT / 'csv_files' CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog' COLOR_CSV_GLOB = '*_cards.csv' MASTER_FILE = 'cards.csv' COMMANDER_FILE = 'commander_cards.csv' @dataclass class ThemeSuggestion: cards: List[str] commanders: List[str] synergy_commanders: List[str] def _parse_theme_tags(raw: str) -> List[str]: if not raw: return [] raw = raw.strip() if not raw or raw == '[]': return [] try: # themeTags stored like "['Landfall', 'Ramp']" – use literal_eval safely val = ast.literal_eval(raw) if isinstance(val, list): return [str(x) for x in val if isinstance(x, str)] except Exception: pass # Fallback naive parse return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()] def scan_color_csvs(include_master: bool, max_rank: float, progress_every: int) -> Tuple[Dict[str, List[Tuple[float, str]]], Dict[str, List[Tuple[float, str]]]]: theme_hits: Dict[str, List[Tuple[float, str]]] = {} legendary_hits: Dict[str, List[Tuple[float, str]]] = {} files: List[Path] = [] for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)): name = fp.name if name == MASTER_FILE and not include_master: continue if name == COMMANDER_FILE: continue # skip testdata if 'testdata' in str(fp): continue files.append(fp) total_files = len(files) processed = 0 for fp in files: processed += 1 try: with fp.open(encoding='utf-8', newline='') as f: reader = csv.DictReader(f) line_idx = 0 for row in reader: line_idx += 1 if progress_every and line_idx % progress_every == 0: print(f"[scan] {fp.name} line {line_idx}", file=sys.stderr, flush=True) tags_raw = row.get('themeTags') or '' if not tags_raw: continue try: rank = float(row.get('edhrecRank') or 999999) except Exception: rank = 999999 if rank > max_rank: continue tags = _parse_theme_tags(tags_raw) name = row.get('name') or '' if not name: continue is_legendary = False try: typ = row.get('type') or '' if isinstance(typ, str) and 'Legendary' in typ.split(): is_legendary = True except Exception: pass for t in tags: if not t: continue theme_hits.setdefault(t, []).append((rank, name)) if is_legendary: legendary_hits.setdefault(t, []).append((rank, name)) except Exception as e: # pragma: no cover print(f"[warn] failed reading {fp.name}: {e}", file=sys.stderr) print(f"[scan] completed {fp.name} ({processed}/{total_files})", file=sys.stderr, flush=True) # Trim each bucket to reasonable size (keep best ranks) for mapping, cap in ((theme_hits, 120), (legendary_hits, 80)): for t, lst in mapping.items(): lst.sort(key=lambda x: x[0]) if len(lst) > cap: del lst[cap:] return theme_hits, legendary_hits def scan_commander_csv(max_rank: float) -> Dict[str, List[Tuple[float, str]]]: path = CSV_DIR / COMMANDER_FILE out: Dict[str, List[Tuple[float, str]]] = {} if not path.exists(): return out try: with path.open(encoding='utf-8', newline='') as f: reader = csv.DictReader(f) for row in reader: tags_raw = row.get('themeTags') or '' if not tags_raw: continue tags = _parse_theme_tags(tags_raw) try: rank = float(row.get('edhrecRank') or 999999) except Exception: rank = 999999 if rank > max_rank: continue name = row.get('name') or '' if not name: continue for t in tags: if not t: continue out.setdefault(t, []).append((rank, name)) except Exception as e: # pragma: no cover print(f"[warn] failed reading {COMMANDER_FILE}: {e}", file=sys.stderr) for t, lst in out.items(): lst.sort(key=lambda x: x[0]) if len(lst) > 60: del lst[60:] return out def load_yaml_theme(path: Path) -> dict: try: return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {} except Exception: return {} def write_yaml_theme(path: Path, data: dict): txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True) path.write_text(txt, encoding='utf-8') def build_suggestions(theme_hits: Dict[str, List[Tuple[float, str]]], commander_hits: Dict[str, List[Tuple[float, str]]], top: int, top_commanders: int, *, synergy_top=(3,2,1), min_examples: int = 5) -> Dict[str, ThemeSuggestion]: suggestions: Dict[str, ThemeSuggestion] = {} all_themes: Set[str] = set(theme_hits.keys()) | set(commander_hits.keys()) for t in sorted(all_themes): card_names: List[str] = [] if t in theme_hits: for rank, name in theme_hits[t][: top * 3]: # oversample then dedup if name not in card_names: card_names.append(name) if len(card_names) >= top: break commander_names: List[str] = [] if t in commander_hits: for rank, name in commander_hits[t][: top_commanders * 2]: if name not in commander_names: commander_names.append(name) if len(commander_names) >= top_commanders: break # Placeholder synergy_commanders; will be filled later after we know synergies per theme from YAML suggestions[t] = ThemeSuggestion(cards=card_names, commanders=commander_names, synergy_commanders=[]) return suggestions def _derive_synergy_commanders(base_theme: str, data: dict, all_yaml: Dict[str, dict], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1)) -> List[Tuple[str, str]]: """Pick synergy commanders with their originating synergy label. Returns list of (commander_name, synergy_theme) preserving order of (top synergy, second, third) and internal ranking. """ synergies = data.get('synergies') or [] if not isinstance(synergies, list): return [] pattern = list(synergy_top) out: List[Tuple[str, str]] = [] for idx, count in enumerate(pattern): if idx >= len(synergies): break s_name = synergies[idx] bucket = commander_hits.get(s_name) or [] taken = 0 for _, cname in bucket: if all(cname != existing for existing, _ in out): out.append((cname, s_name)) taken += 1 if taken >= count: break if taken < count: # fallback to legendary card hits tagged with that synergy fallback_bucket = legendary_hits.get(s_name) or [] for _, cname in fallback_bucket: if all(cname != existing for existing, _ in out): out.append((cname, s_name)) taken += 1 if taken >= count: break return out def _augment_synergies(data: dict, base_theme: str) -> bool: """Heuristically augment the 'synergies' list when it's sparse. Rules: - If synergies length >= 3, leave as-is. - Start with existing synergies then append curated/enforced/inferred (in that order) if missing. - For any theme whose display_name contains 'Counter' add 'Counters Matter' and 'Proliferate'. Returns True if modified. """ synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else [] if not isinstance(synergies, list): return False original = list(synergies) if len(synergies) < 3: for key in ('curated_synergies', 'enforced_synergies', 'inferred_synergies'): lst = data.get(key) if isinstance(lst, list): for s in lst: if isinstance(s, str) and s and s not in synergies: synergies.append(s) name = data.get('display_name') or base_theme if isinstance(name, str) and 'counter' in name.lower(): for extra in ('Counters Matter', 'Proliferate'): if extra not in synergies: synergies.append(extra) # Deduplicate preserving order seen = set() deduped = [] for s in synergies: if s not in seen: deduped.append(s) seen.add(s) if deduped != synergies: synergies = deduped if synergies != original: data['synergies'] = synergies return True return False def apply_to_yaml(suggestions: Dict[str, ThemeSuggestion], *, limit_yaml: int, force: bool, themes_filter: Set[str], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1), min_examples: int = 5, augment_synergies: bool = False): updated = 0 # Preload all YAML for synergy lookups (avoid repeated disk IO inside loop) all_yaml_cache: Dict[str, dict] = {} for p in CATALOG_DIR.glob('*.yml'): try: all_yaml_cache[p.name] = load_yaml_theme(p) except Exception: pass for path in sorted(CATALOG_DIR.glob('*.yml')): data = load_yaml_theme(path) if not isinstance(data, dict): continue display = data.get('display_name') if not isinstance(display, str) or not display: continue if themes_filter and display not in themes_filter: continue sug = suggestions.get(display) if not sug: continue changed = False # Optional synergy augmentation prior to commander derivation if augment_synergies and _augment_synergies(data, display): changed = True # Derive synergy_commanders before promotion logic synergy_cmds = _derive_synergy_commanders(display, data, all_yaml_cache, commander_hits, legendary_hits, synergy_top=synergy_top) # Annotate synergy_commanders with their synergy source for transparency synergy_cmd_names = [f"{c} - Synergy ({src})" for c, src in synergy_cmds] if (force or not data.get('example_cards')) and sug.cards: data['example_cards'] = sug.cards changed = True existing_examples: List[str] = list(data.get('example_commanders') or []) if isinstance(data.get('example_commanders'), list) else [] if force or not existing_examples: if sug.commanders: data['example_commanders'] = list(sug.commanders) existing_examples = data['example_commanders'] changed = True # (Attachment of synergy_commanders moved to after promotion so we can filter duplicates with example_commanders) # Re-annotate existing example_commanders if they use old base-theme annotation pattern if existing_examples and synergy_cmds: # Detect old pattern: ends with base theme name inside parentheses needs_reannotate = False old_suffix = f" - Synergy ({display})" for ex in existing_examples: if ex.endswith(old_suffix): needs_reannotate = True break if needs_reannotate: # Build mapping from commander name to synergy source source_map = {name: src for name, src in synergy_cmds} new_examples: List[str] = [] for ex in existing_examples: if ' - Synergy (' in ex: base_name = ex.split(' - Synergy ')[0] if base_name in source_map: new_examples.append(f"{base_name} - Synergy ({source_map[base_name]})") continue new_examples.append(ex) if new_examples != existing_examples: data['example_commanders'] = new_examples existing_examples = new_examples changed = True # Promotion: ensure at least min_examples in example_commanders by moving from synergy list (without duplicates) if (len(existing_examples) < min_examples) and synergy_cmd_names: needed = min_examples - len(existing_examples) promoted = [] for cname, source_synergy in synergy_cmds: # Avoid duplicate even with annotation if not any(cname == base.split(' - Synergy ')[0] for base in existing_examples): annotated = f"{cname} - Synergy ({source_synergy})" existing_examples.append(annotated) promoted.append(cname) needed -= 1 if needed <= 0: break if promoted: data['example_commanders'] = existing_examples changed = True # After any potential promotions / re-annotations, attach synergy_commanders excluding any commanders already present in example_commanders existing_base_names = {ex.split(' - Synergy ')[0] for ex in (data.get('example_commanders') or []) if isinstance(ex, str)} filtered_synergy_cmd_names = [] for entry in synergy_cmd_names: base = entry.split(' - Synergy ')[0] if base not in existing_base_names: filtered_synergy_cmd_names.append(entry) prior_synergy_cmds = data.get('synergy_commanders') if isinstance(data.get('synergy_commanders'), list) else [] if prior_synergy_cmds != filtered_synergy_cmd_names: if filtered_synergy_cmd_names or force or prior_synergy_cmds: data['synergy_commanders'] = filtered_synergy_cmd_names changed = True if changed: write_yaml_theme(path, data) updated += 1 print(f"[apply] updated {path.name}") if limit_yaml and updated >= limit_yaml: print(f"[apply] reached limit {limit_yaml}; stopping") break return updated def main(): # pragma: no cover parser = argparse.ArgumentParser(description='Generate example_cards / example_commanders suggestions for theme YAML') parser.add_argument('--themes', type=str, help='Comma-separated subset of display names to restrict') parser.add_argument('--top', type=int, default=8, help='Target number of example_cards suggestions') parser.add_argument('--top-commanders', type=int, default=5, help='Target number of example_commanders suggestions') parser.add_argument('--max-rank', type=float, default=60000, help='Skip cards with EDHREC rank above this threshold') parser.add_argument('--include-master', action='store_true', help='Include large cards.csv in scan (slower)') parser.add_argument('--progress-every', type=int, default=0, help='Emit a progress line every N rows per file') parser.add_argument('--apply', action='store_true', help='Write missing fields into YAML files') parser.add_argument('--limit-yaml', type=int, default=0, help='Limit number of YAML files modified (0 = unlimited)') parser.add_argument('--force', action='store_true', help='Overwrite existing example lists') parser.add_argument('--min-examples', type=int, default=5, help='Minimum desired example_commanders; promote from synergy_commanders if short') parser.add_argument('--augment-synergies', action='store_true', help='Heuristically augment sparse synergies list before deriving synergy_commanders') args = parser.parse_args() themes_filter: Set[str] = set() if args.themes: themes_filter = {t.strip() for t in args.themes.split(',') if t.strip()} print('[info] scanning CSVs...', file=sys.stderr) theme_hits, legendary_hits = scan_color_csvs(args.include_master, args.max_rank, args.progress_every) print('[info] scanning commander CSV...', file=sys.stderr) commander_hits = scan_commander_csv(args.max_rank) print('[info] building suggestions...', file=sys.stderr) suggestions = build_suggestions(theme_hits, commander_hits, args.top, args.top_commanders, min_examples=args.min_examples) if not args.apply: # Dry run: print JSON-like summary for filtered subset (or first 25 themes) to_show = sorted(themes_filter) if themes_filter else list(sorted(suggestions.keys())[:25]) for t in to_show: s = suggestions.get(t) if not s: continue print(f"\n=== {t} ===") print('example_cards:', ', '.join(s.cards) or '(none)') print('example_commanders:', ', '.join(s.commanders) or '(none)') print('synergy_commanders: (computed at apply time)') print('\n[info] dry-run complete (use --apply to write)') return if yaml is None: print('ERROR: PyYAML not installed; cannot apply changes.', file=sys.stderr) sys.exit(1) updated = apply_to_yaml(suggestions, limit_yaml=args.limit_yaml, force=args.force, themes_filter=themes_filter, commander_hits=commander_hits, legendary_hits=legendary_hits, synergy_top=(3,2,1), min_examples=args.min_examples, augment_synergies=args.augment_synergies) print(f'[info] updated {updated} YAML files') if __name__ == '__main__': # pragma: no cover main()