"""Editorial population helper for theme YAML files. Features implemented here: Commander population modes: - Padding: Fill undersized example_commanders lists (< --min) with synergy-derived commanders. - Rebalance: Prepend missing base-theme commanders if list already meets --min but lacks them. - Base-first rebuild: Overwrite lists using ordering (base tag -> synergy tag -> color fallback), truncating to --min. Example cards population (NEW): - Optional (--fill-example-cards) creation/padding of example_cards lists to a target size (default 10) using base theme cards first, then synergy theme cards, then color-identity fallback. - EDHREC ordering: Uses ascending edhrecRank sourced from cards.csv (if present) or shard CSVs. - Avoids reusing commander names (base portion of commander entries) to diversify examples. Safeguards: - Dry run by default (no writes unless --apply) - Does not truncate existing example_cards if already >= target - Deduplicates by raw card name Typical usage: Populate commanders only (padding): python code/scripts/synergy_promote_fill.py --min 5 --apply Base-first rebuild of commanders AND populate 10 example cards: python code/scripts/synergy_promote_fill.py --base-first-rebuild --min 5 \ --fill-example-cards --cards-target 10 --apply Only fill example cards (leave commanders untouched): python code/scripts/synergy_promote_fill.py --fill-example-cards --cards-target 10 --apply """ from __future__ import annotations import argparse import ast import csv from pathlib import Path from typing import Dict, List, Tuple, Set, Iterable, Optional try: import yaml # type: ignore except Exception: # pragma: no cover yaml = None ROOT = Path(__file__).resolve().parents[2] CSV_DIR = ROOT / 'csv_files' CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog' COLOR_CSV_GLOB = '*_cards.csv' COMMANDER_FILE = 'commander_cards.csv' MASTER_CARDS_FILE = 'cards.csv' def parse_theme_tags(raw: str) -> List[str]: if not raw: return [] raw = raw.strip() if not raw or raw == '[]': return [] try: val = ast.literal_eval(raw) if isinstance(val, list): return [str(x) for x in val if isinstance(x, str)] except Exception: pass return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()] def parse_color_identity(raw: str | None) -> Set[str]: if not raw: return set() raw = raw.strip() if not raw: return set() try: val = ast.literal_eval(raw) if isinstance(val, (list, tuple)): return {str(x).upper() for x in val if str(x).upper() in {'W','U','B','R','G','C'}} except Exception: pass # fallback: collect mana letters present return {ch for ch in raw.upper() if ch in {'W','U','B','R','G','C'}} def scan_sources(max_rank: float) -> Tuple[Dict[str, List[Tuple[float,str]]], Dict[str, List[Tuple[float,str]]], List[Tuple[float,str,Set[str]]]]: """Build commander candidate pools exclusively from commander_cards.csv. We intentionally ignore the color shard *_cards.csv sources here because those include many non-commander legendary permanents or context-specific lists; using only commander_cards.csv guarantees every suggestion is a legal commander. Returns: theme_hits: mapping theme tag -> sorted unique list of (rank, commander name) theme_all_legendary_hits: alias of theme_hits (legacy return shape) color_pool: list of (rank, commander name, color identity set) """ theme_hits: Dict[str, List[Tuple[float,str]]] = {} color_pool: List[Tuple[float,str,Set[str]]] = [] commander_path = CSV_DIR / COMMANDER_FILE if not commander_path.exists(): return {}, {}, [] try: with commander_path.open(encoding='utf-8', newline='') as f: reader = csv.DictReader(f) for row in reader: try: rank = float(row.get('edhrecRank') or 999999) except Exception: rank = 999999 if rank > max_rank: continue typ = row.get('type') or '' if 'Legendary' not in typ: continue name = row.get('name') or '' if not name: continue ci = parse_color_identity(row.get('colorIdentity') or row.get('colors')) color_pool.append((rank, name, ci)) tags_raw = row.get('themeTags') or '' if tags_raw: for t in parse_theme_tags(tags_raw): theme_hits.setdefault(t, []).append((rank, name)) except Exception: pass # Deduplicate + sort theme hits for t, lst in theme_hits.items(): lst.sort(key=lambda x: x[0]) seen: Set[str] = set() dedup: List[Tuple[float,str]] = [] for r, n in lst: if n in seen: continue seen.add(n) dedup.append((r, n)) theme_hits[t] = dedup # Deduplicate color pool (keep best rank) color_pool.sort(key=lambda x: x[0]) seen_cp: Set[str] = set() dedup_pool: List[Tuple[float,str,Set[str]]] = [] for r, n, cset in color_pool: if n in seen_cp: continue seen_cp.add(n) dedup_pool.append((r, n, cset)) return theme_hits, theme_hits, dedup_pool def scan_card_pool(max_rank: float, use_master: bool = False) -> Tuple[Dict[str, List[Tuple[float, str, Set[str]]]], List[Tuple[float, str, Set[str]]]]: """Scan non-commander card pool for example_cards population. Default behavior (preferred per project guidance): ONLY use the shard color CSVs ([color]_cards.csv). The consolidated master ``cards.csv`` contains every card face/variant and can introduce duplicate or art-variant noise (e.g., "Sol Ring // Sol Ring"). We therefore avoid it unless explicitly requested via ``use_master=True`` / ``--use-master-cards``. When the master file is used we prefer ``faceName`` over ``name`` (falls back to name) and collapse redundant split names like "Foo // Foo" to just "Foo". Returns: theme_card_hits: mapping theme tag -> [(rank, card name, color set)] sorted & deduped color_pool: global list of unique cards for color fallback """ theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {} color_pool: List[Tuple[float, str, Set[str]]] = [] master_path = CSV_DIR / MASTER_CARDS_FILE def canonical_name(row: Dict[str, str]) -> str: nm = (row.get('faceName') or row.get('name') or '').strip() if '//' in nm: parts = [p.strip() for p in nm.split('//')] if len(parts) == 2 and parts[0] == parts[1]: nm = parts[0] return nm def _process_row(row: Dict[str, str]): try: rank = float(row.get('edhrecRank') or 999999) except Exception: rank = 999999 if rank > max_rank: return # Prefer canonicalized name (faceName if present; collapse duplicate split faces) name = canonical_name(row) if not name: return ci = parse_color_identity(row.get('colorIdentity') or row.get('colors')) tags_raw = row.get('themeTags') or '' if tags_raw: for t in parse_theme_tags(tags_raw): theme_card_hits.setdefault(t, []).append((rank, name, ci)) color_pool.append((rank, name, ci)) # Collection strategy if use_master and master_path.exists(): try: with master_path.open(encoding='utf-8', newline='') as f: reader = csv.DictReader(f) for row in reader: _process_row(row) except Exception: pass # fall through to shards if master problematic # Always process shards (either primary source or to ensure we have coverage if master read failed) if not use_master or not master_path.exists(): for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)): if fp.name in {COMMANDER_FILE}: continue if 'testdata' in str(fp): continue try: with fp.open(encoding='utf-8', newline='') as f: reader = csv.DictReader(f) for row in reader: _process_row(row) except Exception: continue # Dedup + rank-sort per theme for t, lst in theme_card_hits.items(): lst.sort(key=lambda x: x[0]) seen: Set[str] = set() dedup: List[Tuple[float, str, Set[str]]] = [] for r, n, cset in lst: if n in seen: continue seen.add(n) dedup.append((r, n, cset)) theme_card_hits[t] = dedup # Dedup global color pool (keep best rank occurrence) color_pool.sort(key=lambda x: x[0]) seen_global: Set[str] = set() dedup_global: List[Tuple[float, str, Set[str]]] = [] for r, n, cset in color_pool: if n in seen_global: continue seen_global.add(n) dedup_global.append((r, n, cset)) return theme_card_hits, dedup_global def load_yaml(path: Path) -> dict: try: return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {} except Exception: return {} def save_yaml(path: Path, data: dict): txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True) path.write_text(txt, encoding='utf-8') def theme_color_set(data: dict) -> Set[str]: mapping = {'White':'W','Blue':'U','Black':'B','Red':'R','Green':'G','Colorless':'C'} out: Set[str] = set() for key in ('primary_color','secondary_color','tertiary_color'): val = data.get(key) if isinstance(val, str) and val in mapping: out.add(mapping[val]) return out def rebuild_base_first( data: dict, theme_hits: Dict[str, List[Tuple[float,str]]], min_examples: int, color_pool: Iterable[Tuple[float,str,Set[str]]], annotate_color_reason: bool = False, ) -> List[str]: """Return new example_commanders list using base-first strategy.""" if not isinstance(data, dict): return [] display = data.get('display_name') or '' synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else [] chosen: List[str] = [] used: Set[str] = set() # Base theme hits first (rank order) for _, cname in theme_hits.get(display, []): if len(chosen) >= min_examples: break if cname in used: continue chosen.append(cname) used.add(cname) # Synergy hits annotated if len(chosen) < min_examples: for syn in synergies: for _, cname in theme_hits.get(syn, []): if len(chosen) >= min_examples: break if cname in used: continue chosen.append(f"{cname} - Synergy ({syn})") used.add(cname) if len(chosen) >= min_examples: break # Color fallback if len(chosen) < min_examples: t_colors = theme_color_set(data) if t_colors: for _, cname, cset in color_pool: if len(chosen) >= min_examples: break if cset - t_colors: continue if cname in used: continue if annotate_color_reason: chosen.append(f"{cname} - Color Fallback (no on-theme commander available)") else: chosen.append(cname) used.add(cname) return chosen[:min_examples] def fill_example_cards( data: dict, theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]], color_pool: Iterable[Tuple[float, str, Set[str]]], target: int, avoid: Optional[Set[str]] = None, allow_color_fallback: bool = True, rebuild: bool = False, ) -> Tuple[bool, List[str]]: """Populate or pad example_cards using base->synergy->color ordering. - Card ordering within each phase preserves ascending EDHREC rank (already sorted). - 'avoid' set lets us skip commander names to diversify examples. - Does not shrink an overfilled list (only grows up to target). Returns (changed, added_entries). """ if not isinstance(data, dict): return False, [] cards_field = data.get('example_cards') if not isinstance(cards_field, list): cards_field = [] # Rebuild forces clearing existing list so we can repopulate even if already at target size if rebuild: cards_field = [] original = list(cards_field) if len(cards_field) >= target and not rebuild: return False, [] # nothing to do when already populated unless rebuilding display = data.get('display_name') or '' synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else [] used: Set[str] = {c for c in cards_field if isinstance(c, str)} if avoid: used |= avoid # Phase 1: base theme cards for _, name, _ in theme_card_hits.get(display, []): if len(cards_field) >= target: break if name in used: continue cards_field.append(name) used.add(name) # Phase 2: synergy cards if len(cards_field) < target: for syn in synergies: for _, name, _ in theme_card_hits.get(syn, []): if len(cards_field) >= target: break if name in used: continue cards_field.append(name) used.add(name) if len(cards_field) >= target: break # Phase 3: color fallback if allow_color_fallback and len(cards_field) < target: t_colors = theme_color_set(data) if t_colors: for _, name, cset in color_pool: if len(cards_field) >= target: break if name in used: continue if cset - t_colors: continue cards_field.append(name) used.add(name) # Trim safeguard (should not exceed target) if len(cards_field) > target: del cards_field[target:] if cards_field != original: data['example_cards'] = cards_field added = [c for c in cards_field if c not in original] return True, added return False, [] def pad_theme( data: dict, theme_hits: Dict[str, List[Tuple[float,str]]], min_examples: int, color_pool: Iterable[Tuple[float,str,Set[str]]], base_min: int = 2, drop_annotation_if_base: bool = True, ) -> Tuple[bool, List[str]]: """Return (changed, added_entries). Hybrid strategy: 1. Ensure up to base_min commanders directly tagged with the base theme (display_name) appear (unannotated) before filling remaining slots. 2. Then add synergy-tagged commanders (annotated) in listed order, skipping duplicates. 3. If still short, cycle remaining base hits (if any unused) and then color fallback. 4. If a commander is both a base hit and added during synergy phase and drop_annotation_if_base=True, we emit it unannotated to highlight it as a flagship example. """ if not isinstance(data, dict): return False, [] examples = data.get('example_commanders') if not isinstance(examples, list): # Treat missing / invalid field as empty to allow first-time population examples = [] data['example_commanders'] = examples if len(examples) >= min_examples: return False, [] synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else [] display = data.get('display_name') or '' base_names = {e.split(' - Synergy ')[0] for e in examples if isinstance(e,str)} added: List[str] = [] # Phase 1: seed with base theme commanders (unannotated) up to base_min base_cands = theme_hits.get(display) or [] for _, cname in base_cands: if len(examples) + len(added) >= min_examples or len([a for a in added if ' - Synergy (' not in a]) >= base_min: break if cname in base_names: continue base_names.add(cname) added.append(cname) # Phase 2: synergy-based candidates following list order for syn in synergies: if len(examples) + len(added) >= min_examples: break cand_list = theme_hits.get(syn) or [] for _, cname in cand_list: if len(examples) + len(added) >= min_examples: break if cname in base_names: continue # If commander is ALSO tagged with base theme and we want a clean flagship, drop annotation base_tagged = any(cname == bn for _, bn in base_cands) if base_tagged and drop_annotation_if_base: annotated = cname else: annotated = f"{cname} - Synergy ({syn})" base_names.add(cname) added.append(annotated) # Phase 3: if still short, add any remaining unused base hits (unannotated) if len(examples) + len(added) < min_examples: for _, cname in base_cands: if len(examples) + len(added) >= min_examples: break if cname in base_names: continue base_names.add(cname) added.append(cname) if len(examples) + len(added) < min_examples: # Color-aware fallback: fill with top-ranked legendary commanders whose color identity is subset of theme colors t_colors = theme_color_set(data) if t_colors: for _, cname, cset in color_pool: if len(examples) + len(added) >= min_examples: break if not cset: # colorless commander acceptable if theme includes C or any color (subset logic handles) pass if cset - t_colors: continue # requires colors outside theme palette if cname in base_names: continue base_names.add(cname) added.append(cname) # unannotated to avoid invalid synergy annotation if added: data['example_commanders'] = examples + added return True, added return False, [] def main(): # pragma: no cover (script orchestration) ap = argparse.ArgumentParser(description='Synergy-based padding for undersized example_commanders lists') ap.add_argument('--min', type=int, default=5, help='Minimum target examples (default 5)') ap.add_argument('--max-rank', type=float, default=60000, help='EDHREC rank ceiling for candidate commanders') ap.add_argument('--base-min', type=int, default=2, help='Minimum number of base-theme commanders (default 2)') ap.add_argument('--no-drop-base-annotation', action='store_true', help='Do not drop synergy annotation when commander also has base theme tag') ap.add_argument('--rebalance', action='store_true', help='Adjust themes already meeting --min if they lack required base-theme commanders') ap.add_argument('--base-first-rebuild', action='store_true', help='Overwrite lists using base-first strategy (base -> synergy -> color)') ap.add_argument('--apply', action='store_true', help='Write changes (default dry-run)') # Example cards population flags ap.add_argument('--fill-example-cards', action='store_true', help='Populate example_cards (base->synergy->[color fallback])') ap.add_argument('--cards-target', type=int, default=10, help='Target number of example_cards (default 10)') ap.add_argument('--cards-max-rank', type=float, default=60000, help='EDHREC rank ceiling for example_cards candidates') ap.add_argument('--cards-no-color-fallback', action='store_true', help='Do NOT use color identity fallback for example_cards (only theme & synergies)') ap.add_argument('--rebuild-example-cards', action='store_true', help='Discard existing example_cards and rebuild from scratch') ap.add_argument('--text-heuristics', action='store_true', help='Augment example_cards by scanning card text for theme keywords when direct tag hits are empty') ap.add_argument('--no-generic-pad', action='store_true', help='When true, leave example_cards shorter than target instead of filling with generic color-fallback or staple cards') ap.add_argument('--annotate-color-fallback-commanders', action='store_true', help='Annotate color fallback commander additions with reason when base/synergy empty') ap.add_argument('--heuristic-rank-cap', type=float, default=25000, help='Maximum EDHREC rank allowed for heuristic text-derived candidates (default 25000)') ap.add_argument('--use-master-cards', action='store_true', help='Use consolidated master cards.csv (default: use only shard [color]_cards.csv files)') ap.add_argument('--cards-limited-color-fallback-threshold', type=int, default=0, help='If >0 and color fallback disabled, allow a second limited color fallback pass only for themes whose example_cards count remains below this threshold after heuristics') ap.add_argument('--common-card-threshold', type=float, default=0.18, help='Exclude candidate example_cards appearing (before build) in > this fraction of themes (default 0.18 = 18%)') ap.add_argument('--print-dup-metrics', action='store_true', help='Print global duplicate frequency metrics for example_cards after run') args = ap.parse_args() if yaml is None: print('PyYAML not installed') raise SystemExit(1) theme_hits, _, color_pool = scan_sources(args.max_rank) theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {} card_color_pool: List[Tuple[float, str, Set[str]]] = [] name_index: Dict[str, Tuple[float, str, Set[str]]] = {} if args.fill_example_cards: theme_card_hits, card_color_pool = scan_card_pool(args.cards_max_rank, use_master=args.use_master_cards) # Build quick lookup for manual overrides name_index = {n: (r, n, c) for r, n, c in card_color_pool} changed_count = 0 cards_changed = 0 # Precompute text index lazily only if requested text_index: Dict[str, List[Tuple[float, str, Set[str]]]] = {} staples_block: Set[str] = { # common generic staples to suppress unless they match heuristics explicitly 'Sol Ring','Arcane Signet','Command Tower','Exotic Orchard','Path of Ancestry','Swiftfoot Boots','Lightning Greaves','Reliquary Tower' } # Build text index if heuristics requested if args.text_heuristics: # Build text index from the same source strategy: master (optional) + shards, honoring faceName & canonical split collapse. import re def _scan_rows_for_text(reader): for row in reader: try: rank = float(row.get('edhrecRank') or 999999) except Exception: rank = 999999 if rank > args.cards_max_rank: continue # canonical naming logic (mirrors scan_card_pool) nm = (row.get('faceName') or row.get('name') or '').strip() if '//' in nm: parts = [p.strip() for p in nm.split('//')] if len(parts) == 2 and parts[0] == parts[1]: nm = parts[0] if not nm: continue text = (row.get('text') or '').lower() ci = parse_color_identity(row.get('colorIdentity') or row.get('colors')) tokens = set(re.findall(r"\+1/\+1|[a-zA-Z']+", text)) for t in tokens: if not t: continue bucket = text_index.setdefault(t, []) bucket.append((rank, nm, ci)) try: if args.use_master_cards and (CSV_DIR / MASTER_CARDS_FILE).exists(): with (CSV_DIR / MASTER_CARDS_FILE).open(encoding='utf-8', newline='') as f: _scan_rows_for_text(csv.DictReader(f)) # Always include shards (they are authoritative curated sets) for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)): if fp.name in {COMMANDER_FILE} or 'testdata' in str(fp): continue with fp.open(encoding='utf-8', newline='') as f: _scan_rows_for_text(csv.DictReader(f)) # sort & dedup per token for tok, lst in text_index.items(): lst.sort(key=lambda x: x[0]) seen_tok: Set[str] = set() dedup_tok: List[Tuple[float, str, Set[str]]] = [] for r, n, c in lst: if n in seen_tok: continue seen_tok.add(n) dedup_tok.append((r, n, c)) text_index[tok] = dedup_tok except Exception: text_index = {} def heuristic_candidates(theme_name: str) -> List[Tuple[float, str, Set[str]]]: if not args.text_heuristics or not text_index: return [] name_lower = theme_name.lower() manual: Dict[str, List[str]] = { 'landfall': ['landfall'], 'reanimate': ['reanimate','unearth','eternalize','return','graveyard'], 'tokens matter': ['token','populate','clue','treasure','food','blood','incubator','map','powerstone','role'], '+1/+1 counters': ['+1/+1','counter','proliferate','adapt','evolve'], 'superfriends': ['planeswalker','loyalty','proliferate'], 'aggro': ['haste','attack','battalion','raid','melee'], 'lifegain': ['life','lifelink'], 'graveyard matters': ['graveyard','dies','mill','disturb','flashback'], 'group hug': ['draw','each','everyone','opponent','card','all'], 'politics': ['each','player','vote','council'], 'stax': ['sacrifice','upkeep','each','player','skip'], 'aristocrats': ['dies','sacrifice','token'], 'sacrifice matters': ['sacrifice','dies'], 'sacrifice to draw': ['sacrifice','draw'], 'artifact tokens': ['treasure','clue','food','blood','powerstone','incubator','map'], 'archer kindred': ['archer','bow','ranged'], 'eerie': ['enchant','aura','role','eerie'], } # Manual hand-picked iconic cards per theme (prioritized before token buckets) manual_cards: Dict[str, List[str]] = { 'group hug': [ 'Howling Mine','Temple Bell','Rites of Flourishing','Kami of the Crescent Moon','Dictate of Kruphix', 'Font of Mythos','Minds Aglow','Collective Voyage','Horn of Greed','Prosperity' ], 'reanimate': [ 'Reanimate','Animate Dead','Victimize','Living Death','Necromancy', 'Exhume','Dread Return','Unburial Rites','Persist','Stitch Together' ], 'archer kindred': [ 'Greatbow Doyen','Archer\'s Parapet','Jagged-Scar Archers','Silklash Spider','Elite Scaleguard', 'Kyren Sniper','Viridian Longbow','Brigid, Hero of Kinsbaile','Longshot Squad','Evolution Sage' ], 'eerie': [ 'Sythis, Harvest\'s Hand','Enchantress\'s Presence','Setessan Champion','Eidolon of Blossoms','Mesa Enchantress', 'Sterling Grove','Calix, Guided by Fate','Femeref Enchantress','Satyr Enchanter','Argothian Enchantress' ], } keys = manual.get(name_lower, []) if not keys: # derive naive tokens: split words >3 chars import re keys = [w for w in re.findall(r'[a-zA-Z\+\/]+', name_lower) if len(w) > 3 or '+1/+1' in w] merged: List[Tuple[float, str, Set[str]]] = [] seen: Set[str] = set() # Insert manual card overrides first (respect rank cap if available) if name_lower in manual_cards and name_index: for card in manual_cards[name_lower]: tup = name_index.get(card) if not tup: continue r, n, ci = tup if r > args.heuristic_rank_cap: continue if n in seen: continue seen.add(n) merged.append(tup) for k in keys: bucket = text_index.get(k) if not bucket: continue for r, n, ci in bucket[:120]: if n in seen: continue if r > args.heuristic_rank_cap: continue # skip staples if they lack the keyword in name (avoid universal ramp/utility artifacts) if n in staples_block and k not in n.lower(): continue seen.add(n) merged.append((r, n, ci)) if len(merged) >= 60: break return merged for path in sorted(CATALOG_DIR.glob('*.yml')): data = load_yaml(path) if not data or not isinstance(data, dict) or not data.get('display_name'): continue notes = data.get('notes') if isinstance(notes, str) and 'Deprecated alias file' in notes: continue ex = data.get('example_commanders') if not isinstance(ex, list): ex = [] data['example_commanders'] = ex need_rebalance = False if args.base_first_rebuild: new_list = rebuild_base_first( data, theme_hits, args.min, color_pool, annotate_color_reason=args.annotate_color_fallback_commanders, ) if new_list != ex: data['example_commanders'] = new_list changed_count += 1 print(f"[rebuild] {path.name}: {len(ex)} -> {len(new_list)}") if args.apply: save_yaml(path, data) else: if len(ex) >= args.min: if args.rebalance and data.get('display_name'): base_tag = data['display_name'] base_cands = {n for _, n in theme_hits.get(base_tag, [])} existing_base_examples = [e for e in ex if (e.split(' - Synergy ')[0]) in base_cands and ' - Synergy (' not in e] if len(existing_base_examples) < args.base_min and base_cands: need_rebalance = True if not need_rebalance: pass # leave commanders untouched (might still fill cards) if need_rebalance: orig_len = len(ex) base_tag = data['display_name'] base_cands_ordered = [n for _, n in theme_hits.get(base_tag, [])] current_base_names = {e.split(' - Synergy ')[0] for e in ex} additions: List[str] = [] for cname in base_cands_ordered: if len([a for a in ex + additions if ' - Synergy (' not in a]) >= args.base_min: break if cname in current_base_names: continue additions.append(cname) current_base_names.add(cname) if additions: data['example_commanders'] = additions + ex changed_count += 1 print(f"[rebalance] {path.name}: inserted {len(additions)} base exemplars (len {orig_len} -> {len(data['example_commanders'])})") if args.apply: save_yaml(path, data) else: if len(ex) < args.min: orig_len = len(ex) changed, added = pad_theme( data, theme_hits, args.min, color_pool, base_min=args.base_min, drop_annotation_if_base=not args.no_drop_base_annotation, ) if changed: changed_count += 1 print(f"[promote] {path.name}: {orig_len} -> {len(data['example_commanders'])} (added {len(added)})") if args.apply: save_yaml(path, data) # Example cards population if args.fill_example_cards: avoid = {c.split(' - Synergy ')[0] for c in data.get('example_commanders', []) if isinstance(c, str)} pre_cards_len = len(data.get('example_cards') or []) if isinstance(data.get('example_cards'), list) else 0 # If no direct tag hits for base theme AND heuristics enabled, inject synthetic hits display = data.get('display_name') or '' if args.text_heuristics and display and not theme_card_hits.get(display): cand = heuristic_candidates(display) if cand: theme_card_hits[display] = cand # Build global duplicate frequency map ONCE (baseline prior to this run) if threshold active if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' not in globals(): # type: ignore freq: Dict[str, int] = {} total_themes = 0 for fp0 in CATALOG_DIR.glob('*.yml'): dat0 = load_yaml(fp0) if not isinstance(dat0, dict): continue ecs0 = dat0.get('example_cards') if not isinstance(ecs0, list) or not ecs0: continue total_themes += 1 seen_local: Set[str] = set() for c in ecs0: if not isinstance(c, str) or c in seen_local: continue seen_local.add(c) freq[c] = freq.get(c, 0) + 1 globals()['GLOBAL_CARD_FREQ'] = (freq, total_themes) # type: ignore # Apply duplicate filtering to candidate lists (do NOT mutate existing example_cards) if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' in globals(): # type: ignore freq_map, total_prev = globals()['GLOBAL_CARD_FREQ'] # type: ignore if total_prev > 0: # avoid div-by-zero cutoff = args.common_card_threshold def _filter(lst: List[Tuple[float, str, Set[str]]]) -> List[Tuple[float, str, Set[str]]]: out: List[Tuple[float, str, Set[str]]] = [] for r, n, cset in lst: if (freq_map.get(n, 0) / total_prev) > cutoff: continue out.append((r, n, cset)) return out if display in theme_card_hits: theme_card_hits[display] = _filter(theme_card_hits[display]) for syn in (data.get('synergies') or []): if syn in theme_card_hits: theme_card_hits[syn] = _filter(theme_card_hits[syn]) changed_cards, added_cards = fill_example_cards( data, theme_card_hits, card_color_pool, # Keep target upper bound even when --no-generic-pad so we still collect # base + synergy thematic cards; the flag simply disables color/generic # fallback padding rather than suppressing all population. args.cards_target, avoid=avoid, allow_color_fallback=(not args.cards_no_color_fallback and not args.no_generic_pad), rebuild=args.rebuild_example_cards, ) # Optional second pass limited color fallback for sparse themes if (not changed_cards or len(data.get('example_cards', []) or []) < args.cards_target) and args.cards_limited_color_fallback_threshold > 0 and args.cards_no_color_fallback: current_len = len(data.get('example_cards') or []) if current_len < args.cards_limited_color_fallback_threshold: # Top up with color fallback only for remaining slots changed2, added2 = fill_example_cards( data, theme_card_hits, card_color_pool, args.cards_target, avoid=avoid, allow_color_fallback=True, rebuild=False, ) if changed2: changed_cards = True added_cards.extend(added2) if changed_cards: cards_changed += 1 print(f"[cards] {path.name}: {pre_cards_len} -> {len(data['example_cards'])} (added {len(added_cards)})") if args.apply: save_yaml(path, data) print(f"[promote] modified {changed_count} themes") if args.fill_example_cards: print(f"[cards] modified {cards_changed} themes (target {args.cards_target})") if args.print_dup_metrics and 'GLOBAL_CARD_FREQ' in globals(): # type: ignore freq_map, total_prev = globals()['GLOBAL_CARD_FREQ'] # type: ignore if total_prev: items = sorted(freq_map.items(), key=lambda x: (-x[1], x[0]))[:30] print('[dup-metrics] Top shared example_cards (baseline before this run):') for name, cnt in items: print(f" {name}: {cnt}/{total_prev} ({cnt/max(total_prev,1):.1%})") raise SystemExit(0) if __name__ == '__main__': # pragma: no cover main()