mtg_python_deckbuilder/code/scripts/synergy_promote_fill.py

"""Editorial population helper for theme YAML files.

Features implemented here:

Commander population modes:
 - Padding: Fill undersized example_commanders lists (< --min) with synergy-derived commanders.
 - Rebalance: Prepend missing base-theme commanders if list already meets --min but lacks them.
 - Base-first rebuild: Overwrite lists using ordering (base tag -> synergy tag -> color fallback), truncating to --min.

Example cards population (NEW):
 - Optional (--fill-example-cards) creation/padding of example_cards lists to a target size (default 10)
   using base theme cards first, then synergy theme cards, then color-identity fallback.
 - EDHREC ordering: Uses ascending edhrecRank sourced from cards.csv (if present) or shard CSVs.
 - Avoids reusing commander names (base portion of commander entries) to diversify examples.

Safeguards:
 - Dry run by default (no writes unless --apply)
 - Does not truncate existing example_cards if already >= target
 - Deduplicates by raw card name

Typical usage:
  Populate commanders only (padding):
      python code/scripts/synergy_promote_fill.py --min 5 --apply

  Base-first rebuild of commanders AND populate 10 example cards:
      python code/scripts/synergy_promote_fill.py --base-first-rebuild --min 5 \
          --fill-example-cards --cards-target 10 --apply

  Only fill example cards (leave commanders untouched):
      python code/scripts/synergy_promote_fill.py --fill-example-cards --cards-target 10 --apply
"""
from __future__ import annotations
import argparse
import ast
import csv
from pathlib import Path
from typing import Dict, List, Tuple, Set, Iterable, Optional

try:
    import yaml  # type: ignore
except Exception:  # pragma: no cover
    yaml = None

ROOT = Path(__file__).resolve().parents[2]
CSV_DIR = ROOT / 'csv_files'
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
COLOR_CSV_GLOB = '*_cards.csv'
COMMANDER_FILE = 'commander_cards.csv'
MASTER_CARDS_FILE = 'cards.csv'


def parse_theme_tags(raw: str) -> List[str]:
    if not raw:
        return []
    raw = raw.strip()
    if not raw or raw == '[]':
        return []
    try:
        val = ast.literal_eval(raw)
        if isinstance(val, list):
            return [str(x) for x in val if isinstance(x, str)]
    except Exception:
        pass
    return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()]


def parse_color_identity(raw: str | None) -> Set[str]:
    if not raw:
        return set()
    raw = raw.strip()
    if not raw:
        return set()
    try:
        val = ast.literal_eval(raw)
        if isinstance(val, (list, tuple)):
            return {str(x).upper() for x in val if str(x).upper() in {'W','U','B','R','G','C'}}
    except Exception:
        pass
    # fallback: collect mana letters present
    return {ch for ch in raw.upper() if ch in {'W','U','B','R','G','C'}}


def scan_sources(max_rank: float) -> Tuple[Dict[str, List[Tuple[float,str]]], Dict[str, List[Tuple[float,str]]], List[Tuple[float,str,Set[str]]]]:
    """Build commander candidate pools exclusively from commander_cards.csv.

    We intentionally ignore the color shard *_cards.csv sources here because those
    include many non-commander legendary permanents or context-specific lists; using
    only commander_cards.csv guarantees every suggestion is a legal commander.

    Returns:
        theme_hits: mapping theme tag -> sorted unique list of (rank, commander name)
        theme_all_legendary_hits: alias of theme_hits (legacy return shape)
        color_pool: list of (rank, commander name, color identity set)
    """
    theme_hits: Dict[str, List[Tuple[float,str]]] = {}
    color_pool: List[Tuple[float,str,Set[str]]] = []
    commander_path = CSV_DIR / COMMANDER_FILE
    if not commander_path.exists():
        return {}, {}, []
    try:
        with commander_path.open(encoding='utf-8', newline='') as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    rank = float(row.get('edhrecRank') or 999999)
                except Exception:
                    rank = 999999
                if rank > max_rank:
                    continue
                typ = row.get('type') or ''
                if 'Legendary' not in typ:
                    continue
                name = row.get('name') or ''
                if not name:
                    continue
                ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
                color_pool.append((rank, name, ci))
                tags_raw = row.get('themeTags') or ''
                if tags_raw:
                    for t in parse_theme_tags(tags_raw):
                        theme_hits.setdefault(t, []).append((rank, name))
    except Exception:
        pass
    # Deduplicate + sort theme hits
    for t, lst in theme_hits.items():
        lst.sort(key=lambda x: x[0])
        seen: Set[str] = set()
        dedup: List[Tuple[float,str]] = []
        for r, n in lst:
            if n in seen:
                continue
            seen.add(n)
            dedup.append((r, n))
        theme_hits[t] = dedup
    # Deduplicate color pool (keep best rank)
    color_pool.sort(key=lambda x: x[0])
    seen_cp: Set[str] = set()
    dedup_pool: List[Tuple[float,str,Set[str]]] = []
    for r, n, cset in color_pool:
        if n in seen_cp:
            continue
        seen_cp.add(n)
        dedup_pool.append((r, n, cset))
    return theme_hits, theme_hits, dedup_pool


def scan_card_pool(max_rank: float, use_master: bool = False) -> Tuple[Dict[str, List[Tuple[float, str, Set[str]]]], List[Tuple[float, str, Set[str]]]]:
    """Scan non-commander card pool for example_cards population.

    Default behavior (preferred per project guidance): ONLY use the shard color CSVs ([color]_cards.csv).
    The consolidated master ``cards.csv`` contains every card face/variant and can introduce duplicate
    or art-variant noise (e.g., "Sol Ring // Sol Ring"). We therefore avoid it unless explicitly
    requested via ``use_master=True`` / ``--use-master-cards``.

    When the master file is used we prefer ``faceName`` over ``name`` (falls back to name) and
    collapse redundant split names like "Foo // Foo" to just "Foo".

    Returns:
        theme_card_hits: mapping theme tag -> [(rank, card name, color set)] sorted & deduped
        color_pool: global list of unique cards for color fallback
    """
    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
    color_pool: List[Tuple[float, str, Set[str]]] = []
    master_path = CSV_DIR / MASTER_CARDS_FILE

    def canonical_name(row: Dict[str, str]) -> str:
        nm = (row.get('faceName') or row.get('name') or '').strip()
        if '//' in nm:
            parts = [p.strip() for p in nm.split('//')]
            if len(parts) == 2 and parts[0] == parts[1]:
                nm = parts[0]
        return nm

    def _process_row(row: Dict[str, str]):
        try:
            rank = float(row.get('edhrecRank') or 999999)
        except Exception:
            rank = 999999
        if rank > max_rank:
            return
        # Prefer canonicalized name (faceName if present; collapse duplicate split faces)
        name = canonical_name(row)
        if not name:
            return
        ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
        tags_raw = row.get('themeTags') or ''
        if tags_raw:
            for t in parse_theme_tags(tags_raw):
                theme_card_hits.setdefault(t, []).append((rank, name, ci))
        color_pool.append((rank, name, ci))
    # Collection strategy
    if use_master and master_path.exists():
        try:
            with master_path.open(encoding='utf-8', newline='') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    _process_row(row)
        except Exception:
            pass  # fall through to shards if master problematic
    # Always process shards (either primary source or to ensure we have coverage if master read failed)
    if not use_master or not master_path.exists():
        for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
            if fp.name in {COMMANDER_FILE}:
                continue
            if 'testdata' in str(fp):
                continue
            try:
                with fp.open(encoding='utf-8', newline='') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        _process_row(row)
            except Exception:
                continue

    # Dedup + rank-sort per theme
    for t, lst in theme_card_hits.items():
        lst.sort(key=lambda x: x[0])
        seen: Set[str] = set()
        dedup: List[Tuple[float, str, Set[str]]] = []
        for r, n, cset in lst:
            if n in seen:
                continue
            seen.add(n)
            dedup.append((r, n, cset))
        theme_card_hits[t] = dedup
    # Dedup global color pool (keep best rank occurrence)
    color_pool.sort(key=lambda x: x[0])
    seen_global: Set[str] = set()
    dedup_global: List[Tuple[float, str, Set[str]]] = []
    for r, n, cset in color_pool:
        if n in seen_global:
            continue
        seen_global.add(n)
        dedup_global.append((r, n, cset))
    return theme_card_hits, dedup_global


def load_yaml(path: Path) -> dict:
    try:
        return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {}
    except Exception:
        return {}


def save_yaml(path: Path, data: dict):
    txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True)
    path.write_text(txt, encoding='utf-8')


def theme_color_set(data: dict) -> Set[str]:
    mapping = {'White':'W','Blue':'U','Black':'B','Red':'R','Green':'G','Colorless':'C'}
    out: Set[str] = set()
    for key in ('primary_color','secondary_color','tertiary_color'):
        val = data.get(key)
        if isinstance(val, str) and val in mapping:
            out.add(mapping[val])
    return out


def rebuild_base_first(
    data: dict,
    theme_hits: Dict[str, List[Tuple[float,str]]],
    min_examples: int,
    color_pool: Iterable[Tuple[float,str,Set[str]]],
    annotate_color_reason: bool = False,
) -> List[str]:
    """Return new example_commanders list using base-first strategy."""
    if not isinstance(data, dict):
        return []
    display = data.get('display_name') or ''
    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
    chosen: List[str] = []
    used: Set[str] = set()
    # Base theme hits first (rank order)
    for _, cname in theme_hits.get(display, []):
        if len(chosen) >= min_examples:
            break
        if cname in used:
            continue
        chosen.append(cname)
        used.add(cname)
    # Synergy hits annotated
    if len(chosen) < min_examples:
        for syn in synergies:
            for _, cname in theme_hits.get(syn, []):
                if len(chosen) >= min_examples:
                    break
                if cname in used:
                    continue
                chosen.append(f"{cname} - Synergy ({syn})")
                used.add(cname)
            if len(chosen) >= min_examples:
                break
    # Color fallback
    if len(chosen) < min_examples:
        t_colors = theme_color_set(data)
        if t_colors:
            for _, cname, cset in color_pool:
                if len(chosen) >= min_examples:
                    break
                if cset - t_colors:
                    continue
                if cname in used:
                    continue
                if annotate_color_reason:
                    chosen.append(f"{cname} - Color Fallback (no on-theme commander available)")
                else:
                    chosen.append(cname)
                used.add(cname)
    return chosen[:min_examples]


def fill_example_cards(
    data: dict,
    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]],
    color_pool: Iterable[Tuple[float, str, Set[str]]],
    target: int,
    avoid: Optional[Set[str]] = None,
    allow_color_fallback: bool = True,
    rebuild: bool = False,
) -> Tuple[bool, List[str]]:
    """Populate or pad example_cards using base->synergy->color ordering.

    - Card ordering within each phase preserves ascending EDHREC rank (already sorted).
    - 'avoid' set lets us skip commander names to diversify examples.
    - Does not shrink an overfilled list (only grows up to target).
    Returns (changed, added_entries).
    """
    if not isinstance(data, dict):
        return False, []
    cards_field = data.get('example_cards')
    if not isinstance(cards_field, list):
        cards_field = []
    # Rebuild forces clearing existing list so we can repopulate even if already at target size
    if rebuild:
        cards_field = []
    original = list(cards_field)
    if len(cards_field) >= target and not rebuild:
        return False, []  # nothing to do when already populated unless rebuilding
    display = data.get('display_name') or ''
    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
    used: Set[str] = {c for c in cards_field if isinstance(c, str)}
    if avoid:
        used |= avoid
    # Phase 1: base theme cards
    for _, name, _ in theme_card_hits.get(display, []):
        if len(cards_field) >= target:
            break
        if name in used:
            continue
        cards_field.append(name)
        used.add(name)
    # Phase 2: synergy cards
    if len(cards_field) < target:
        for syn in synergies:
            for _, name, _ in theme_card_hits.get(syn, []):
                if len(cards_field) >= target:
                    break
                if name in used:
                    continue
                cards_field.append(name)
                used.add(name)
            if len(cards_field) >= target:
                break
    # Phase 3: color fallback
    if allow_color_fallback and len(cards_field) < target:
        t_colors = theme_color_set(data)
        if t_colors:
            for _, name, cset in color_pool:
                if len(cards_field) >= target:
                    break
                if name in used:
                    continue
                if cset - t_colors:
                    continue
                cards_field.append(name)
                used.add(name)
    # Trim safeguard (should not exceed target)
    if len(cards_field) > target:
        del cards_field[target:]
    if cards_field != original:
        data['example_cards'] = cards_field
        added = [c for c in cards_field if c not in original]
        return True, added
    return False, []


def pad_theme(
    data: dict,
    theme_hits: Dict[str, List[Tuple[float,str]]],
    min_examples: int,
    color_pool: Iterable[Tuple[float,str,Set[str]]],
    base_min: int = 2,
    drop_annotation_if_base: bool = True,
) -> Tuple[bool, List[str]]:
    """Return (changed, added_entries).

    Hybrid strategy:
      1. Ensure up to base_min commanders directly tagged with the base theme (display_name) appear (unannotated)
         before filling remaining slots.
      2. Then add synergy-tagged commanders (annotated) in listed order, skipping duplicates.
      3. If still short, cycle remaining base hits (if any unused) and then color fallback.
      4. If a commander is both a base hit and added during synergy phase and drop_annotation_if_base=True,
         we emit it unannotated to highlight it as a flagship example.
    """
    if not isinstance(data, dict):
        return False, []
    examples = data.get('example_commanders')
    if not isinstance(examples, list):
        # Treat missing / invalid field as empty to allow first-time population
        examples = []
        data['example_commanders'] = examples
    if len(examples) >= min_examples:
        return False, []
    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
    display = data.get('display_name') or ''
    base_names = {e.split(' - Synergy ')[0] for e in examples if isinstance(e,str)}
    added: List[str] = []
    # Phase 1: seed with base theme commanders (unannotated) up to base_min
    base_cands = theme_hits.get(display) or []
    for _, cname in base_cands:
        if len(examples) + len(added) >= min_examples or len([a for a in added if ' - Synergy (' not in a]) >= base_min:
            break
        if cname in base_names:
            continue
        base_names.add(cname)
        added.append(cname)

    # Phase 2: synergy-based candidates following list order
    for syn in synergies:
        if len(examples) + len(added) >= min_examples:
            break
        cand_list = theme_hits.get(syn) or []
        for _, cname in cand_list:
            if len(examples) + len(added) >= min_examples:
                break
            if cname in base_names:
                continue
            # If commander is ALSO tagged with base theme and we want a clean flagship, drop annotation
            base_tagged = any(cname == bn for _, bn in base_cands)
            if base_tagged and drop_annotation_if_base:
                annotated = cname
            else:
                annotated = f"{cname} - Synergy ({syn})"
            base_names.add(cname)
            added.append(annotated)

    # Phase 3: if still short, add any remaining unused base hits (unannotated)
    if len(examples) + len(added) < min_examples:
        for _, cname in base_cands:
            if len(examples) + len(added) >= min_examples:
                break
            if cname in base_names:
                continue
            base_names.add(cname)
            added.append(cname)
    if len(examples) + len(added) < min_examples:
        # Color-aware fallback: fill with top-ranked legendary commanders whose color identity is subset of theme colors
        t_colors = theme_color_set(data)
        if t_colors:
            for _, cname, cset in color_pool:
                if len(examples) + len(added) >= min_examples:
                    break
                if not cset:  # colorless commander acceptable if theme includes C or any color (subset logic handles) 
                    pass
                if cset - t_colors:
                    continue  # requires colors outside theme palette
                if cname in base_names:
                    continue
                base_names.add(cname)
                added.append(cname)  # unannotated to avoid invalid synergy annotation
    if added:
        data['example_commanders'] = examples + added
        return True, added
    return False, []


def main():  # pragma: no cover (script orchestration)
    ap = argparse.ArgumentParser(description='Synergy-based padding for undersized example_commanders lists')
    ap.add_argument('--min', type=int, default=5, help='Minimum target examples (default 5)')
    ap.add_argument('--max-rank', type=float, default=60000, help='EDHREC rank ceiling for candidate commanders')
    ap.add_argument('--base-min', type=int, default=2, help='Minimum number of base-theme commanders (default 2)')
    ap.add_argument('--no-drop-base-annotation', action='store_true', help='Do not drop synergy annotation when commander also has base theme tag')
    ap.add_argument('--rebalance', action='store_true', help='Adjust themes already meeting --min if they lack required base-theme commanders')
    ap.add_argument('--base-first-rebuild', action='store_true', help='Overwrite lists using base-first strategy (base -> synergy -> color)')
    ap.add_argument('--apply', action='store_true', help='Write changes (default dry-run)')
    # Example cards population flags
    ap.add_argument('--fill-example-cards', action='store_true', help='Populate example_cards (base->synergy->[color fallback])')
    ap.add_argument('--cards-target', type=int, default=10, help='Target number of example_cards (default 10)')
    ap.add_argument('--cards-max-rank', type=float, default=60000, help='EDHREC rank ceiling for example_cards candidates')
    ap.add_argument('--cards-no-color-fallback', action='store_true', help='Do NOT use color identity fallback for example_cards (only theme & synergies)')
    ap.add_argument('--rebuild-example-cards', action='store_true', help='Discard existing example_cards and rebuild from scratch')
    ap.add_argument('--text-heuristics', action='store_true', help='Augment example_cards by scanning card text for theme keywords when direct tag hits are empty')
    ap.add_argument('--no-generic-pad', action='store_true', help='When true, leave example_cards shorter than target instead of filling with generic color-fallback or staple cards')
    ap.add_argument('--annotate-color-fallback-commanders', action='store_true', help='Annotate color fallback commander additions with reason when base/synergy empty')
    ap.add_argument('--heuristic-rank-cap', type=float, default=25000, help='Maximum EDHREC rank allowed for heuristic text-derived candidates (default 25000)')
    ap.add_argument('--use-master-cards', action='store_true', help='Use consolidated master cards.csv (default: use only shard [color]_cards.csv files)')
    ap.add_argument('--cards-limited-color-fallback-threshold', type=int, default=0, help='If >0 and color fallback disabled, allow a second limited color fallback pass only for themes whose example_cards count remains below this threshold after heuristics')
    ap.add_argument('--common-card-threshold', type=float, default=0.18, help='Exclude candidate example_cards appearing (before build) in > this fraction of themes (default 0.18 = 18%)')
    ap.add_argument('--print-dup-metrics', action='store_true', help='Print global duplicate frequency metrics for example_cards after run')
    args = ap.parse_args()
    if yaml is None:
        print('PyYAML not installed')
        raise SystemExit(1)
    theme_hits, _, color_pool = scan_sources(args.max_rank)
    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
    card_color_pool: List[Tuple[float, str, Set[str]]] = []
    name_index: Dict[str, Tuple[float, str, Set[str]]] = {}
    if args.fill_example_cards:
        theme_card_hits, card_color_pool = scan_card_pool(args.cards_max_rank, use_master=args.use_master_cards)
        # Build quick lookup for manual overrides
        name_index = {n: (r, n, c) for r, n, c in card_color_pool}
    changed_count = 0
    cards_changed = 0
    # Precompute text index lazily only if requested
    text_index: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
    staples_block: Set[str] = {  # common generic staples to suppress unless they match heuristics explicitly
        'Sol Ring','Arcane Signet','Command Tower','Exotic Orchard','Path of Ancestry','Swiftfoot Boots','Lightning Greaves','Reliquary Tower'
    }
    # Build text index if heuristics requested
    if args.text_heuristics:
        # Build text index from the same source strategy: master (optional) + shards, honoring faceName & canonical split collapse.
        import re
        def _scan_rows_for_text(reader):
            for row in reader:
                try:
                    rank = float(row.get('edhrecRank') or 999999)
                except Exception:
                    rank = 999999
                if rank > args.cards_max_rank:
                    continue
                # canonical naming logic (mirrors scan_card_pool)
                nm = (row.get('faceName') or row.get('name') or '').strip()
                if '//' in nm:
                    parts = [p.strip() for p in nm.split('//')]
                    if len(parts) == 2 and parts[0] == parts[1]:
                        nm = parts[0]
                if not nm:
                    continue
                text = (row.get('text') or '').lower()
                ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
                tokens = set(re.findall(r"\+1/\+1|[a-zA-Z']+", text))
                for t in tokens:
                    if not t:
                        continue
                    bucket = text_index.setdefault(t, [])
                    bucket.append((rank, nm, ci))
        try:
            if args.use_master_cards and (CSV_DIR / MASTER_CARDS_FILE).exists():
                with (CSV_DIR / MASTER_CARDS_FILE).open(encoding='utf-8', newline='') as f:
                    _scan_rows_for_text(csv.DictReader(f))
            # Always include shards (they are authoritative curated sets)
            for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
                if fp.name in {COMMANDER_FILE} or 'testdata' in str(fp):
                    continue
                with fp.open(encoding='utf-8', newline='') as f:
                    _scan_rows_for_text(csv.DictReader(f))
            # sort & dedup per token
            for tok, lst in text_index.items():
                lst.sort(key=lambda x: x[0])
                seen_tok: Set[str] = set()
                dedup_tok: List[Tuple[float, str, Set[str]]] = []
                for r, n, c in lst:
                    if n in seen_tok:
                        continue
                    seen_tok.add(n)
                    dedup_tok.append((r, n, c))
                text_index[tok] = dedup_tok
        except Exception:
            text_index = {}

    def heuristic_candidates(theme_name: str) -> List[Tuple[float, str, Set[str]]]:
        if not args.text_heuristics or not text_index:
            return []
        name_lower = theme_name.lower()
        manual: Dict[str, List[str]] = {
            'landfall': ['landfall'],
            'reanimate': ['reanimate','unearth','eternalize','return','graveyard'],
            'tokens matter': ['token','populate','clue','treasure','food','blood','incubator','map','powerstone','role'],
            '+1/+1 counters': ['+1/+1','counter','proliferate','adapt','evolve'],
            'superfriends': ['planeswalker','loyalty','proliferate'],
            'aggro': ['haste','attack','battalion','raid','melee'],
            'lifegain': ['life','lifelink'],
            'graveyard matters': ['graveyard','dies','mill','disturb','flashback'],
            'group hug': ['draw','each','everyone','opponent','card','all'],
            'politics': ['each','player','vote','council'],
            'stax': ['sacrifice','upkeep','each','player','skip'],
            'aristocrats': ['dies','sacrifice','token'],
            'sacrifice matters': ['sacrifice','dies'],
            'sacrifice to draw': ['sacrifice','draw'],
            'artifact tokens': ['treasure','clue','food','blood','powerstone','incubator','map'],
            'archer kindred': ['archer','bow','ranged'],
            'eerie': ['enchant','aura','role','eerie'],
        }
        # Manual hand-picked iconic cards per theme (prioritized before token buckets)
        manual_cards: Dict[str, List[str]] = {
            'group hug': [
                'Howling Mine','Temple Bell','Rites of Flourishing','Kami of the Crescent Moon','Dictate of Kruphix',
                'Font of Mythos','Minds Aglow','Collective Voyage','Horn of Greed','Prosperity'
            ],
            'reanimate': [
                'Reanimate','Animate Dead','Victimize','Living Death','Necromancy',
                'Exhume','Dread Return','Unburial Rites','Persist','Stitch Together'
            ],
            'archer kindred': [
                'Greatbow Doyen','Archer\'s Parapet','Jagged-Scar Archers','Silklash Spider','Elite Scaleguard',
                'Kyren Sniper','Viridian Longbow','Brigid, Hero of Kinsbaile','Longshot Squad','Evolution Sage'
            ],
            'eerie': [
                'Sythis, Harvest\'s Hand','Enchantress\'s Presence','Setessan Champion','Eidolon of Blossoms','Mesa Enchantress',
                'Sterling Grove','Calix, Guided by Fate','Femeref Enchantress','Satyr Enchanter','Argothian Enchantress'
            ],
        }
        keys = manual.get(name_lower, [])
        if not keys:
            # derive naive tokens: split words >3 chars
            import re
            keys = [w for w in re.findall(r'[a-zA-Z\+\/]+', name_lower) if len(w) > 3 or '+1/+1' in w]
        merged: List[Tuple[float, str, Set[str]]] = []
        seen: Set[str] = set()
        # Insert manual card overrides first (respect rank cap if available)
        if name_lower in manual_cards and name_index:
            for card in manual_cards[name_lower]:
                tup = name_index.get(card)
                if not tup:
                    continue
                r, n, ci = tup
                if r > args.heuristic_rank_cap:
                    continue
                if n in seen:
                    continue
                seen.add(n)
                merged.append(tup)
        for k in keys:
            bucket = text_index.get(k)
            if not bucket:
                continue
            for r, n, ci in bucket[:120]:
                if n in seen:
                    continue
                if r > args.heuristic_rank_cap:
                    continue
                # skip staples if they lack the keyword in name (avoid universal ramp/utility artifacts)
                if n in staples_block and k not in n.lower():
                    continue
                seen.add(n)
                merged.append((r, n, ci))
            if len(merged) >= 60:
                break
        return merged

    for path in sorted(CATALOG_DIR.glob('*.yml')):
        data = load_yaml(path)
        if not data or not isinstance(data, dict) or not data.get('display_name'):
            continue
        notes = data.get('notes')
        if isinstance(notes, str) and 'Deprecated alias file' in notes:
            continue
        ex = data.get('example_commanders')
        if not isinstance(ex, list):
            ex = []
            data['example_commanders'] = ex
        need_rebalance = False
        if args.base_first_rebuild:
            new_list = rebuild_base_first(
                data,
                theme_hits,
                args.min,
                color_pool,
                annotate_color_reason=args.annotate_color_fallback_commanders,
            )
            if new_list != ex:
                data['example_commanders'] = new_list
                changed_count += 1
                print(f"[rebuild] {path.name}: {len(ex)} -> {len(new_list)}")
                if args.apply:
                    save_yaml(path, data)
        else:
            if len(ex) >= args.min:
                if args.rebalance and data.get('display_name'):
                    base_tag = data['display_name']
                    base_cands = {n for _, n in theme_hits.get(base_tag, [])}
                    existing_base_examples = [e for e in ex if (e.split(' - Synergy ')[0]) in base_cands and ' - Synergy (' not in e]
                    if len(existing_base_examples) < args.base_min and base_cands:
                        need_rebalance = True
                if not need_rebalance:
                    pass  # leave commanders untouched (might still fill cards)
            if need_rebalance:
                orig_len = len(ex)
                base_tag = data['display_name']
                base_cands_ordered = [n for _, n in theme_hits.get(base_tag, [])]
                current_base_names = {e.split(' - Synergy ')[0] for e in ex}
                additions: List[str] = []
                for cname in base_cands_ordered:
                    if len([a for a in ex + additions if ' - Synergy (' not in a]) >= args.base_min:
                        break
                    if cname in current_base_names:
                        continue
                    additions.append(cname)
                    current_base_names.add(cname)
                if additions:
                    data['example_commanders'] = additions + ex
                    changed_count += 1
                    print(f"[rebalance] {path.name}: inserted {len(additions)} base exemplars (len {orig_len} -> {len(data['example_commanders'])})")
                    if args.apply:
                        save_yaml(path, data)
            else:
                if len(ex) < args.min:
                    orig_len = len(ex)
                    changed, added = pad_theme(
                        data,
                        theme_hits,
                        args.min,
                        color_pool,
                        base_min=args.base_min,
                        drop_annotation_if_base=not args.no_drop_base_annotation,
                    )
                    if changed:
                        changed_count += 1
                        print(f"[promote] {path.name}: {orig_len} -> {len(data['example_commanders'])} (added {len(added)})")
                        if args.apply:
                            save_yaml(path, data)
        # Example cards population
        if args.fill_example_cards:
            avoid = {c.split(' - Synergy ')[0] for c in data.get('example_commanders', []) if isinstance(c, str)}
            pre_cards_len = len(data.get('example_cards') or []) if isinstance(data.get('example_cards'), list) else 0
            # If no direct tag hits for base theme AND heuristics enabled, inject synthetic hits
            display = data.get('display_name') or ''
            if args.text_heuristics and display and not theme_card_hits.get(display):
                cand = heuristic_candidates(display)
                if cand:
                    theme_card_hits[display] = cand
            # Build global duplicate frequency map ONCE (baseline prior to this run) if threshold active
            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' not in globals():
                freq: Dict[str, int] = {}
                total_themes = 0
                for fp0 in CATALOG_DIR.glob('*.yml'):
                    dat0 = load_yaml(fp0)
                    if not isinstance(dat0, dict):
                        continue
                    ecs0 = dat0.get('example_cards')
                    if not isinstance(ecs0, list) or not ecs0:
                        continue
                    total_themes += 1
                    seen_local: Set[str] = set()
                    for c in ecs0:
                        if not isinstance(c, str) or c in seen_local:
                            continue
                        seen_local.add(c)
                        freq[c] = freq.get(c, 0) + 1
                globals()['GLOBAL_CARD_FREQ'] = (freq, total_themes)
            # Apply duplicate filtering to candidate lists (do NOT mutate existing example_cards)
            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' in globals():
                freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']
                if total_prev > 0:  # avoid div-by-zero
                    cutoff = args.common_card_threshold
                    def _filter(lst: List[Tuple[float, str, Set[str]]]) -> List[Tuple[float, str, Set[str]]]:
                        out: List[Tuple[float, str, Set[str]]] = []
                        for r, n, cset in lst:
                            if (freq_map.get(n, 0) / total_prev) > cutoff:
                                continue
                            out.append((r, n, cset))
                        return out
                    if display in theme_card_hits:
                        theme_card_hits[display] = _filter(theme_card_hits[display])
                    for syn in (data.get('synergies') or []):
                        if syn in theme_card_hits:
                            theme_card_hits[syn] = _filter(theme_card_hits[syn])
            changed_cards, added_cards = fill_example_cards(
                data,
                theme_card_hits,
                card_color_pool,
                # Keep target upper bound even when --no-generic-pad so we still collect
                # base + synergy thematic cards; the flag simply disables color/generic
                # fallback padding rather than suppressing all population.
                args.cards_target,
                avoid=avoid,
                allow_color_fallback=(not args.cards_no_color_fallback and not args.no_generic_pad),
                rebuild=args.rebuild_example_cards,
            )
            # Optional second pass limited color fallback for sparse themes
            if (not changed_cards or len(data.get('example_cards', []) or []) < args.cards_target) and args.cards_limited_color_fallback_threshold > 0 and args.cards_no_color_fallback:
                current_len = len(data.get('example_cards') or [])
                if current_len < args.cards_limited_color_fallback_threshold:
                    # Top up with color fallback only for remaining slots
                    changed2, added2 = fill_example_cards(
                        data,
                        theme_card_hits,
                        card_color_pool,
                        args.cards_target,
                        avoid=avoid,
                        allow_color_fallback=True,
                        rebuild=False,
                    )
                    if changed2:
                        changed_cards = True
                        added_cards.extend(added2)
            if changed_cards:
                cards_changed += 1
                print(f"[cards] {path.name}: {pre_cards_len} -> {len(data['example_cards'])} (added {len(added_cards)})")
                if args.apply:
                    save_yaml(path, data)
    print(f"[promote] modified {changed_count} themes")
    if args.fill_example_cards:
        print(f"[cards] modified {cards_changed} themes (target {args.cards_target})")
        if args.print_dup_metrics and 'GLOBAL_CARD_FREQ' in globals():
            freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']
            if total_prev:
                items = sorted(freq_map.items(), key=lambda x: (-x[1], x[0]))[:30]
                print('[dup-metrics] Top shared example_cards (baseline before this run):')
                for name, cnt in items:
                    print(f"  {name}: {cnt}/{total_prev} ({cnt/max(total_prev,1):.1%})")
    raise SystemExit(0)


if __name__ == '__main__':  # pragma: no cover
    main()