feat(tagging+archetypes): add Pillowfort/Politics/Midrange/Toolbox tagging and unify archetype presence skip logic

2025-09-22 04:50:46 +02:00 · 2025-09-19 11:53:52 -07:00 · 2025-09-19 11:53:52 -07:00 · 6d6243d6be
commit 6d6243d6be
parent f2a76d2ffc
47 changed files with 21133 additions and 839 deletions
--- a/code/scripts/synergy_promote_fill.py
+++ b/code/scripts/synergy_promote_fill.py
@ -0,0 +1,817 @@
+"""Editorial population helper for theme YAML files.
+
+Features implemented here:
+
+Commander population modes:
+ - Padding: Fill undersized example_commanders lists (< --min) with synergy-derived commanders.
+ - Rebalance: Prepend missing base-theme commanders if list already meets --min but lacks them.
+ - Base-first rebuild: Overwrite lists using ordering (base tag -> synergy tag -> color fallback), truncating to --min.
+
+Example cards population (NEW):
+ - Optional (--fill-example-cards) creation/padding of example_cards lists to a target size (default 10)
+   using base theme cards first, then synergy theme cards, then color-identity fallback.
+ - EDHREC ordering: Uses ascending edhrecRank sourced from cards.csv (if present) or shard CSVs.
+ - Avoids reusing commander names (base portion of commander entries) to diversify examples.
+
+Safeguards:
+ - Dry run by default (no writes unless --apply)
+ - Does not truncate existing example_cards if already >= target
+ - Deduplicates by raw card name
+
+Typical usage:
+  Populate commanders only (padding):
+      python code/scripts/synergy_promote_fill.py --min 5 --apply
+
+  Base-first rebuild of commanders AND populate 10 example cards:
+      python code/scripts/synergy_promote_fill.py --base-first-rebuild --min 5 \
+          --fill-example-cards --cards-target 10 --apply
+
+  Only fill example cards (leave commanders untouched):
+      python code/scripts/synergy_promote_fill.py --fill-example-cards --cards-target 10 --apply
+"""
+from __future__ import annotations
+import argparse
+import ast
+import csv
+from pathlib import Path
+from typing import Dict, List, Tuple, Set, Iterable, Optional
+
+try:
+    import yaml  # type: ignore
+except Exception:  # pragma: no cover
+    yaml = None
+
+ROOT = Path(__file__).resolve().parents[2]
+CSV_DIR = ROOT / 'csv_files'
+CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
+COLOR_CSV_GLOB = '*_cards.csv'
+COMMANDER_FILE = 'commander_cards.csv'
+MASTER_CARDS_FILE = 'cards.csv'
+
+
+def parse_theme_tags(raw: str) -> List[str]:
+    if not raw:
+        return []
+    raw = raw.strip()
+    if not raw or raw == '[]':
+        return []
+    try:
+        val = ast.literal_eval(raw)
+        if isinstance(val, list):
+            return [str(x) for x in val if isinstance(x, str)]
+    except Exception:
+        pass
+    return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()]
+
+
+def parse_color_identity(raw: str | None) -> Set[str]:
+    if not raw:
+        return set()
+    raw = raw.strip()
+    if not raw:
+        return set()
+    try:
+        val = ast.literal_eval(raw)
+        if isinstance(val, (list, tuple)):
+            return {str(x).upper() for x in val if str(x).upper() in {'W','U','B','R','G','C'}}
+    except Exception:
+        pass
+    # fallback: collect mana letters present
+    return {ch for ch in raw.upper() if ch in {'W','U','B','R','G','C'}}
+
+
+def scan_sources(max_rank: float) -> Tuple[Dict[str, List[Tuple[float,str]]], Dict[str, List[Tuple[float,str]]], List[Tuple[float,str,Set[str]]]]:
+    """Build commander candidate pools exclusively from commander_cards.csv.
+
+    We intentionally ignore the color shard *_cards.csv sources here because those
+    include many non-commander legendary permanents or context-specific lists; using
+    only commander_cards.csv guarantees every suggestion is a legal commander.
+
+    Returns:
+        theme_hits: mapping theme tag -> sorted unique list of (rank, commander name)
+        theme_all_legendary_hits: alias of theme_hits (legacy return shape)
+        color_pool: list of (rank, commander name, color identity set)
+    """
+    theme_hits: Dict[str, List[Tuple[float,str]]] = {}
+    color_pool: List[Tuple[float,str,Set[str]]] = []
+    commander_path = CSV_DIR / COMMANDER_FILE
+    if not commander_path.exists():
+        return {}, {}, []
+    try:
+        with commander_path.open(encoding='utf-8', newline='') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                try:
+                    rank = float(row.get('edhrecRank') or 999999)
+                except Exception:
+                    rank = 999999
+                if rank > max_rank:
+                    continue
+                typ = row.get('type') or ''
+                if 'Legendary' not in typ:
+                    continue
+                name = row.get('name') or ''
+                if not name:
+                    continue
+                ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
+                color_pool.append((rank, name, ci))
+                tags_raw = row.get('themeTags') or ''
+                if tags_raw:
+                    for t in parse_theme_tags(tags_raw):
+                        theme_hits.setdefault(t, []).append((rank, name))
+    except Exception:
+        pass
+    # Deduplicate + sort theme hits
+    for t, lst in theme_hits.items():
+        lst.sort(key=lambda x: x[0])
+        seen: Set[str] = set()
+        dedup: List[Tuple[float,str]] = []
+        for r, n in lst:
+            if n in seen:
+                continue
+            seen.add(n)
+            dedup.append((r, n))
+        theme_hits[t] = dedup
+    # Deduplicate color pool (keep best rank)
+    color_pool.sort(key=lambda x: x[0])
+    seen_cp: Set[str] = set()
+    dedup_pool: List[Tuple[float,str,Set[str]]] = []
+    for r, n, cset in color_pool:
+        if n in seen_cp:
+            continue
+        seen_cp.add(n)
+        dedup_pool.append((r, n, cset))
+    return theme_hits, theme_hits, dedup_pool
+
+
+def scan_card_pool(max_rank: float, use_master: bool = False) -> Tuple[Dict[str, List[Tuple[float, str, Set[str]]]], List[Tuple[float, str, Set[str]]]]:
+    """Scan non-commander card pool for example_cards population.
+
+    Default behavior (preferred per project guidance): ONLY use the shard color CSVs ([color]_cards.csv).
+    The consolidated master ``cards.csv`` contains every card face/variant and can introduce duplicate
+    or art-variant noise (e.g., "Sol Ring // Sol Ring"). We therefore avoid it unless explicitly
+    requested via ``use_master=True`` / ``--use-master-cards``.
+
+    When the master file is used we prefer ``faceName`` over ``name`` (falls back to name) and
+    collapse redundant split names like "Foo // Foo" to just "Foo".
+
+    Returns:
+        theme_card_hits: mapping theme tag -> [(rank, card name, color set)] sorted & deduped
+        color_pool: global list of unique cards for color fallback
+    """
+    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
+    color_pool: List[Tuple[float, str, Set[str]]] = []
+    master_path = CSV_DIR / MASTER_CARDS_FILE
+
+    def canonical_name(row: Dict[str, str]) -> str:
+        nm = (row.get('faceName') or row.get('name') or '').strip()
+        if '//' in nm:
+            parts = [p.strip() for p in nm.split('//')]
+            if len(parts) == 2 and parts[0] == parts[1]:
+                nm = parts[0]
+        return nm
+
+    def _process_row(row: Dict[str, str]):
+        try:
+            rank = float(row.get('edhrecRank') or 999999)
+        except Exception:
+            rank = 999999
+        if rank > max_rank:
+            return
+        # Prefer canonicalized name (faceName if present; collapse duplicate split faces)
+        name = canonical_name(row)
+        if not name:
+            return
+        ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
+        tags_raw = row.get('themeTags') or ''
+        if tags_raw:
+            for t in parse_theme_tags(tags_raw):
+                theme_card_hits.setdefault(t, []).append((rank, name, ci))
+        color_pool.append((rank, name, ci))
+    # Collection strategy
+    if use_master and master_path.exists():
+        try:
+            with master_path.open(encoding='utf-8', newline='') as f:
+                reader = csv.DictReader(f)
+                for row in reader:
+                    _process_row(row)
+        except Exception:
+            pass  # fall through to shards if master problematic
+    # Always process shards (either primary source or to ensure we have coverage if master read failed)
+    if not use_master or not master_path.exists():
+        for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
+            if fp.name in {COMMANDER_FILE}:
+                continue
+            if 'testdata' in str(fp):
+                continue
+            try:
+                with fp.open(encoding='utf-8', newline='') as f:
+                    reader = csv.DictReader(f)
+                    for row in reader:
+                        _process_row(row)
+            except Exception:
+                continue
+
+    # Dedup + rank-sort per theme
+    for t, lst in theme_card_hits.items():
+        lst.sort(key=lambda x: x[0])
+        seen: Set[str] = set()
+        dedup: List[Tuple[float, str, Set[str]]] = []
+        for r, n, cset in lst:
+            if n in seen:
+                continue
+            seen.add(n)
+            dedup.append((r, n, cset))
+        theme_card_hits[t] = dedup
+    # Dedup global color pool (keep best rank occurrence)
+    color_pool.sort(key=lambda x: x[0])
+    seen_global: Set[str] = set()
+    dedup_global: List[Tuple[float, str, Set[str]]] = []
+    for r, n, cset in color_pool:
+        if n in seen_global:
+            continue
+        seen_global.add(n)
+        dedup_global.append((r, n, cset))
+    return theme_card_hits, dedup_global
+
+
+def load_yaml(path: Path) -> dict:
+    try:
+        return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {}
+    except Exception:
+        return {}
+
+
+def save_yaml(path: Path, data: dict):
+    txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True)
+    path.write_text(txt, encoding='utf-8')
+
+
+def theme_color_set(data: dict) -> Set[str]:
+    mapping = {'White':'W','Blue':'U','Black':'B','Red':'R','Green':'G','Colorless':'C'}
+    out: Set[str] = set()
+    for key in ('primary_color','secondary_color','tertiary_color'):
+        val = data.get(key)
+        if isinstance(val, str) and val in mapping:
+            out.add(mapping[val])
+    return out
+
+
+def rebuild_base_first(
+    data: dict,
+    theme_hits: Dict[str, List[Tuple[float,str]]],
+    min_examples: int,
+    color_pool: Iterable[Tuple[float,str,Set[str]]],
+    annotate_color_reason: bool = False,
+) -> List[str]:
+    """Return new example_commanders list using base-first strategy."""
+    if not isinstance(data, dict):
+        return []
+    display = data.get('display_name') or ''
+    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
+    chosen: List[str] = []
+    used: Set[str] = set()
+    # Base theme hits first (rank order)
+    for _, cname in theme_hits.get(display, []):
+        if len(chosen) >= min_examples:
+            break
+        if cname in used:
+            continue
+        chosen.append(cname)
+        used.add(cname)
+    # Synergy hits annotated
+    if len(chosen) < min_examples:
+        for syn in synergies:
+            for _, cname in theme_hits.get(syn, []):
+                if len(chosen) >= min_examples:
+                    break
+                if cname in used:
+                    continue
+                chosen.append(f"{cname} - Synergy ({syn})")
+                used.add(cname)
+            if len(chosen) >= min_examples:
+                break
+    # Color fallback
+    if len(chosen) < min_examples:
+        t_colors = theme_color_set(data)
+        if t_colors:
+            for _, cname, cset in color_pool:
+                if len(chosen) >= min_examples:
+                    break
+                if cset - t_colors:
+                    continue
+                if cname in used:
+                    continue
+                if annotate_color_reason:
+                    chosen.append(f"{cname} - Color Fallback (no on-theme commander available)")
+                else:
+                    chosen.append(cname)
+                used.add(cname)
+    return chosen[:min_examples]
+
+
+def fill_example_cards(
+    data: dict,
+    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]],
+    color_pool: Iterable[Tuple[float, str, Set[str]]],
+    target: int,
+    avoid: Optional[Set[str]] = None,
+    allow_color_fallback: bool = True,
+    rebuild: bool = False,
+) -> Tuple[bool, List[str]]:
+    """Populate or pad example_cards using base->synergy->color ordering.
+
+    - Card ordering within each phase preserves ascending EDHREC rank (already sorted).
+    - 'avoid' set lets us skip commander names to diversify examples.
+    - Does not shrink an overfilled list (only grows up to target).
+    Returns (changed, added_entries).
+    """
+    if not isinstance(data, dict):
+        return False, []
+    cards_field = data.get('example_cards')
+    if not isinstance(cards_field, list):
+        cards_field = []
+    # Rebuild forces clearing existing list so we can repopulate even if already at target size
+    if rebuild:
+        cards_field = []
+    original = list(cards_field)
+    if len(cards_field) >= target and not rebuild:
+        return False, []  # nothing to do when already populated unless rebuilding
+    display = data.get('display_name') or ''
+    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
+    used: Set[str] = {c for c in cards_field if isinstance(c, str)}
+    if avoid:
+        used |= avoid
+    # Phase 1: base theme cards
+    for _, name, _ in theme_card_hits.get(display, []):
+        if len(cards_field) >= target:
+            break
+        if name in used:
+            continue
+        cards_field.append(name)
+        used.add(name)
+    # Phase 2: synergy cards
+    if len(cards_field) < target:
+        for syn in synergies:
+            for _, name, _ in theme_card_hits.get(syn, []):
+                if len(cards_field) >= target:
+                    break
+                if name in used:
+                    continue
+                cards_field.append(name)
+                used.add(name)
+            if len(cards_field) >= target:
+                break
+    # Phase 3: color fallback
+    if allow_color_fallback and len(cards_field) < target:
+        t_colors = theme_color_set(data)
+        if t_colors:
+            for _, name, cset in color_pool:
+                if len(cards_field) >= target:
+                    break
+                if name in used:
+                    continue
+                if cset - t_colors:
+                    continue
+                cards_field.append(name)
+                used.add(name)
+    # Trim safeguard (should not exceed target)
+    if len(cards_field) > target:
+        del cards_field[target:]
+    if cards_field != original:
+        data['example_cards'] = cards_field
+        added = [c for c in cards_field if c not in original]
+        return True, added
+    return False, []
+
+
+def pad_theme(
+    data: dict,
+    theme_hits: Dict[str, List[Tuple[float,str]]],
+    min_examples: int,
+    color_pool: Iterable[Tuple[float,str,Set[str]]],
+    base_min: int = 2,
+    drop_annotation_if_base: bool = True,
+) -> Tuple[bool, List[str]]:
+    """Return (changed, added_entries).
+
+    Hybrid strategy:
+      1. Ensure up to base_min commanders directly tagged with the base theme (display_name) appear (unannotated)
+         before filling remaining slots.
+      2. Then add synergy-tagged commanders (annotated) in listed order, skipping duplicates.
+      3. If still short, cycle remaining base hits (if any unused) and then color fallback.
+      4. If a commander is both a base hit and added during synergy phase and drop_annotation_if_base=True,
+         we emit it unannotated to highlight it as a flagship example.
+    """
+    if not isinstance(data, dict):
+        return False, []
+    examples = data.get('example_commanders')
+    if not isinstance(examples, list):
+        # Treat missing / invalid field as empty to allow first-time population
+        examples = []
+        data['example_commanders'] = examples
+    if len(examples) >= min_examples:
+        return False, []
+    synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
+    display = data.get('display_name') or ''
+    base_names = {e.split(' - Synergy ')[0] for e in examples if isinstance(e,str)}
+    added: List[str] = []
+    # Phase 1: seed with base theme commanders (unannotated) up to base_min
+    base_cands = theme_hits.get(display) or []
+    for _, cname in base_cands:
+        if len(examples) + len(added) >= min_examples or len([a for a in added if ' - Synergy (' not in a]) >= base_min:
+            break
+        if cname in base_names:
+            continue
+        base_names.add(cname)
+        added.append(cname)
+
+    # Phase 2: synergy-based candidates following list order
+    for syn in synergies:
+        if len(examples) + len(added) >= min_examples:
+            break
+        cand_list = theme_hits.get(syn) or []
+        for _, cname in cand_list:
+            if len(examples) + len(added) >= min_examples:
+                break
+            if cname in base_names:
+                continue
+            # If commander is ALSO tagged with base theme and we want a clean flagship, drop annotation
+            base_tagged = any(cname == bn for _, bn in base_cands)
+            if base_tagged and drop_annotation_if_base:
+                annotated = cname
+            else:
+                annotated = f"{cname} - Synergy ({syn})"
+            base_names.add(cname)
+            added.append(annotated)
+
+    # Phase 3: if still short, add any remaining unused base hits (unannotated)
+    if len(examples) + len(added) < min_examples:
+        for _, cname in base_cands:
+            if len(examples) + len(added) >= min_examples:
+                break
+            if cname in base_names:
+                continue
+            base_names.add(cname)
+            added.append(cname)
+    if len(examples) + len(added) < min_examples:
+        # Color-aware fallback: fill with top-ranked legendary commanders whose color identity is subset of theme colors
+        t_colors = theme_color_set(data)
+        if t_colors:
+            for _, cname, cset in color_pool:
+                if len(examples) + len(added) >= min_examples:
+                    break
+                if not cset:  # colorless commander acceptable if theme includes C or any color (subset logic handles) 
+                    pass
+                if cset - t_colors:
+                    continue  # requires colors outside theme palette
+                if cname in base_names:
+                    continue
+                base_names.add(cname)
+                added.append(cname)  # unannotated to avoid invalid synergy annotation
+    if added:
+        data['example_commanders'] = examples + added
+        return True, added
+    return False, []
+
+
+def main():  # pragma: no cover (script orchestration)
+    ap = argparse.ArgumentParser(description='Synergy-based padding for undersized example_commanders lists')
+    ap.add_argument('--min', type=int, default=5, help='Minimum target examples (default 5)')
+    ap.add_argument('--max-rank', type=float, default=60000, help='EDHREC rank ceiling for candidate commanders')
+    ap.add_argument('--base-min', type=int, default=2, help='Minimum number of base-theme commanders (default 2)')
+    ap.add_argument('--no-drop-base-annotation', action='store_true', help='Do not drop synergy annotation when commander also has base theme tag')
+    ap.add_argument('--rebalance', action='store_true', help='Adjust themes already meeting --min if they lack required base-theme commanders')
+    ap.add_argument('--base-first-rebuild', action='store_true', help='Overwrite lists using base-first strategy (base -> synergy -> color)')
+    ap.add_argument('--apply', action='store_true', help='Write changes (default dry-run)')
+    # Example cards population flags
+    ap.add_argument('--fill-example-cards', action='store_true', help='Populate example_cards (base->synergy->[color fallback])')
+    ap.add_argument('--cards-target', type=int, default=10, help='Target number of example_cards (default 10)')
+    ap.add_argument('--cards-max-rank', type=float, default=60000, help='EDHREC rank ceiling for example_cards candidates')
+    ap.add_argument('--cards-no-color-fallback', action='store_true', help='Do NOT use color identity fallback for example_cards (only theme & synergies)')
+    ap.add_argument('--rebuild-example-cards', action='store_true', help='Discard existing example_cards and rebuild from scratch')
+    ap.add_argument('--text-heuristics', action='store_true', help='Augment example_cards by scanning card text for theme keywords when direct tag hits are empty')
+    ap.add_argument('--no-generic-pad', action='store_true', help='When true, leave example_cards shorter than target instead of filling with generic color-fallback or staple cards')
+    ap.add_argument('--annotate-color-fallback-commanders', action='store_true', help='Annotate color fallback commander additions with reason when base/synergy empty')
+    ap.add_argument('--heuristic-rank-cap', type=float, default=25000, help='Maximum EDHREC rank allowed for heuristic text-derived candidates (default 25000)')
+    ap.add_argument('--use-master-cards', action='store_true', help='Use consolidated master cards.csv (default: use only shard [color]_cards.csv files)')
+    ap.add_argument('--cards-limited-color-fallback-threshold', type=int, default=0, help='If >0 and color fallback disabled, allow a second limited color fallback pass only for themes whose example_cards count remains below this threshold after heuristics')
+    ap.add_argument('--common-card-threshold', type=float, default=0.18, help='Exclude candidate example_cards appearing (before build) in > this fraction of themes (default 0.18 = 18%)')
+    ap.add_argument('--print-dup-metrics', action='store_true', help='Print global duplicate frequency metrics for example_cards after run')
+    args = ap.parse_args()
+    if yaml is None:
+        print('PyYAML not installed')
+        raise SystemExit(1)
+    theme_hits, _, color_pool = scan_sources(args.max_rank)
+    theme_card_hits: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
+    card_color_pool: List[Tuple[float, str, Set[str]]] = []
+    name_index: Dict[str, Tuple[float, str, Set[str]]] = {}
+    if args.fill_example_cards:
+        theme_card_hits, card_color_pool = scan_card_pool(args.cards_max_rank, use_master=args.use_master_cards)
+        # Build quick lookup for manual overrides
+        name_index = {n: (r, n, c) for r, n, c in card_color_pool}
+    changed_count = 0
+    cards_changed = 0
+    # Precompute text index lazily only if requested
+    text_index: Dict[str, List[Tuple[float, str, Set[str]]]] = {}
+    staples_block: Set[str] = {  # common generic staples to suppress unless they match heuristics explicitly
+        'Sol Ring','Arcane Signet','Command Tower','Exotic Orchard','Path of Ancestry','Swiftfoot Boots','Lightning Greaves','Reliquary Tower'
+    }
+    # Build text index if heuristics requested
+    if args.text_heuristics:
+        # Build text index from the same source strategy: master (optional) + shards, honoring faceName & canonical split collapse.
+        import re
+        def _scan_rows_for_text(reader):
+            for row in reader:
+                try:
+                    rank = float(row.get('edhrecRank') or 999999)
+                except Exception:
+                    rank = 999999
+                if rank > args.cards_max_rank:
+                    continue
+                # canonical naming logic (mirrors scan_card_pool)
+                nm = (row.get('faceName') or row.get('name') or '').strip()
+                if '//' in nm:
+                    parts = [p.strip() for p in nm.split('//')]
+                    if len(parts) == 2 and parts[0] == parts[1]:
+                        nm = parts[0]
+                if not nm:
+                    continue
+                text = (row.get('text') or '').lower()
+                ci = parse_color_identity(row.get('colorIdentity') or row.get('colors'))
+                tokens = set(re.findall(r"\+1/\+1|[a-zA-Z']+", text))
+                for t in tokens:
+                    if not t:
+                        continue
+                    bucket = text_index.setdefault(t, [])
+                    bucket.append((rank, nm, ci))
+        try:
+            if args.use_master_cards and (CSV_DIR / MASTER_CARDS_FILE).exists():
+                with (CSV_DIR / MASTER_CARDS_FILE).open(encoding='utf-8', newline='') as f:
+                    _scan_rows_for_text(csv.DictReader(f))
+            # Always include shards (they are authoritative curated sets)
+            for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
+                if fp.name in {COMMANDER_FILE} or 'testdata' in str(fp):
+                    continue
+                with fp.open(encoding='utf-8', newline='') as f:
+                    _scan_rows_for_text(csv.DictReader(f))
+            # sort & dedup per token
+            for tok, lst in text_index.items():
+                lst.sort(key=lambda x: x[0])
+                seen_tok: Set[str] = set()
+                dedup_tok: List[Tuple[float, str, Set[str]]] = []
+                for r, n, c in lst:
+                    if n in seen_tok:
+                        continue
+                    seen_tok.add(n)
+                    dedup_tok.append((r, n, c))
+                text_index[tok] = dedup_tok
+        except Exception:
+            text_index = {}
+
+    def heuristic_candidates(theme_name: str) -> List[Tuple[float, str, Set[str]]]:
+        if not args.text_heuristics or not text_index:
+            return []
+        name_lower = theme_name.lower()
+        manual: Dict[str, List[str]] = {
+            'landfall': ['landfall'],
+            'reanimate': ['reanimate','unearth','eternalize','return','graveyard'],
+            'tokens matter': ['token','populate','clue','treasure','food','blood','incubator','map','powerstone','role'],
+            '+1/+1 counters': ['+1/+1','counter','proliferate','adapt','evolve'],
+            'superfriends': ['planeswalker','loyalty','proliferate'],
+            'aggro': ['haste','attack','battalion','raid','melee'],
+            'lifegain': ['life','lifelink'],
+            'graveyard matters': ['graveyard','dies','mill','disturb','flashback'],
+            'group hug': ['draw','each','everyone','opponent','card','all'],
+            'politics': ['each','player','vote','council'],
+            'stax': ['sacrifice','upkeep','each','player','skip'],
+            'aristocrats': ['dies','sacrifice','token'],
+            'sacrifice matters': ['sacrifice','dies'],
+            'sacrifice to draw': ['sacrifice','draw'],
+            'artifact tokens': ['treasure','clue','food','blood','powerstone','incubator','map'],
+            'archer kindred': ['archer','bow','ranged'],
+            'eerie': ['enchant','aura','role','eerie'],
+        }
+        # Manual hand-picked iconic cards per theme (prioritized before token buckets)
+        manual_cards: Dict[str, List[str]] = {
+            'group hug': [
+                'Howling Mine','Temple Bell','Rites of Flourishing','Kami of the Crescent Moon','Dictate of Kruphix',
+                'Font of Mythos','Minds Aglow','Collective Voyage','Horn of Greed','Prosperity'
+            ],
+            'reanimate': [
+                'Reanimate','Animate Dead','Victimize','Living Death','Necromancy',
+                'Exhume','Dread Return','Unburial Rites','Persist','Stitch Together'
+            ],
+            'archer kindred': [
+                'Greatbow Doyen','Archer\'s Parapet','Jagged-Scar Archers','Silklash Spider','Elite Scaleguard',
+                'Kyren Sniper','Viridian Longbow','Brigid, Hero of Kinsbaile','Longshot Squad','Evolution Sage'
+            ],
+            'eerie': [
+                'Sythis, Harvest\'s Hand','Enchantress\'s Presence','Setessan Champion','Eidolon of Blossoms','Mesa Enchantress',
+                'Sterling Grove','Calix, Guided by Fate','Femeref Enchantress','Satyr Enchanter','Argothian Enchantress'
+            ],
+        }
+        keys = manual.get(name_lower, [])
+        if not keys:
+            # derive naive tokens: split words >3 chars
+            import re
+            keys = [w for w in re.findall(r'[a-zA-Z\+\/]+', name_lower) if len(w) > 3 or '+1/+1' in w]
+        merged: List[Tuple[float, str, Set[str]]] = []
+        seen: Set[str] = set()
+        # Insert manual card overrides first (respect rank cap if available)
+        if name_lower in manual_cards and name_index:
+            for card in manual_cards[name_lower]:
+                tup = name_index.get(card)
+                if not tup:
+                    continue
+                r, n, ci = tup
+                if r > args.heuristic_rank_cap:
+                    continue
+                if n in seen:
+                    continue
+                seen.add(n)
+                merged.append(tup)
+        for k in keys:
+            bucket = text_index.get(k)
+            if not bucket:
+                continue
+            for r, n, ci in bucket[:120]:
+                if n in seen:
+                    continue
+                if r > args.heuristic_rank_cap:
+                    continue
+                # skip staples if they lack the keyword in name (avoid universal ramp/utility artifacts)
+                if n in staples_block and k not in n.lower():
+                    continue
+                seen.add(n)
+                merged.append((r, n, ci))
+            if len(merged) >= 60:
+                break
+        return merged
+
+    for path in sorted(CATALOG_DIR.glob('*.yml')):
+        data = load_yaml(path)
+        if not data or not isinstance(data, dict) or not data.get('display_name'):
+            continue
+        notes = data.get('notes')
+        if isinstance(notes, str) and 'Deprecated alias file' in notes:
+            continue
+        ex = data.get('example_commanders')
+        if not isinstance(ex, list):
+            ex = []
+            data['example_commanders'] = ex
+        need_rebalance = False
+        if args.base_first_rebuild:
+            new_list = rebuild_base_first(
+                data,
+                theme_hits,
+                args.min,
+                color_pool,
+                annotate_color_reason=args.annotate_color_fallback_commanders,
+            )
+            if new_list != ex:
+                data['example_commanders'] = new_list
+                changed_count += 1
+                print(f"[rebuild] {path.name}: {len(ex)} -> {len(new_list)}")
+                if args.apply:
+                    save_yaml(path, data)
+        else:
+            if len(ex) >= args.min:
+                if args.rebalance and data.get('display_name'):
+                    base_tag = data['display_name']
+                    base_cands = {n for _, n in theme_hits.get(base_tag, [])}
+                    existing_base_examples = [e for e in ex if (e.split(' - Synergy ')[0]) in base_cands and ' - Synergy (' not in e]
+                    if len(existing_base_examples) < args.base_min and base_cands:
+                        need_rebalance = True
+                if not need_rebalance:
+                    pass  # leave commanders untouched (might still fill cards)
+            if need_rebalance:
+                orig_len = len(ex)
+                base_tag = data['display_name']
+                base_cands_ordered = [n for _, n in theme_hits.get(base_tag, [])]
+                current_base_names = {e.split(' - Synergy ')[0] for e in ex}
+                additions: List[str] = []
+                for cname in base_cands_ordered:
+                    if len([a for a in ex + additions if ' - Synergy (' not in a]) >= args.base_min:
+                        break
+                    if cname in current_base_names:
+                        continue
+                    additions.append(cname)
+                    current_base_names.add(cname)
+                if additions:
+                    data['example_commanders'] = additions + ex
+                    changed_count += 1
+                    print(f"[rebalance] {path.name}: inserted {len(additions)} base exemplars (len {orig_len} -> {len(data['example_commanders'])})")
+                    if args.apply:
+                        save_yaml(path, data)
+            else:
+                if len(ex) < args.min:
+                    orig_len = len(ex)
+                    changed, added = pad_theme(
+                        data,
+                        theme_hits,
+                        args.min,
+                        color_pool,
+                        base_min=args.base_min,
+                        drop_annotation_if_base=not args.no_drop_base_annotation,
+                    )
+                    if changed:
+                        changed_count += 1
+                        print(f"[promote] {path.name}: {orig_len} -> {len(data['example_commanders'])} (added {len(added)})")
+                        if args.apply:
+                            save_yaml(path, data)
+        # Example cards population
+        if args.fill_example_cards:
+            avoid = {c.split(' - Synergy ')[0] for c in data.get('example_commanders', []) if isinstance(c, str)}
+            pre_cards_len = len(data.get('example_cards') or []) if isinstance(data.get('example_cards'), list) else 0
+            # If no direct tag hits for base theme AND heuristics enabled, inject synthetic hits
+            display = data.get('display_name') or ''
+            if args.text_heuristics and display and not theme_card_hits.get(display):
+                cand = heuristic_candidates(display)
+                if cand:
+                    theme_card_hits[display] = cand
+            # Build global duplicate frequency map ONCE (baseline prior to this run) if threshold active
+            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' not in globals():  # type: ignore
+                freq: Dict[str, int] = {}
+                total_themes = 0
+                for fp0 in CATALOG_DIR.glob('*.yml'):
+                    dat0 = load_yaml(fp0)
+                    if not isinstance(dat0, dict):
+                        continue
+                    ecs0 = dat0.get('example_cards')
+                    if not isinstance(ecs0, list) or not ecs0:
+                        continue
+                    total_themes += 1
+                    seen_local: Set[str] = set()
+                    for c in ecs0:
+                        if not isinstance(c, str) or c in seen_local:
+                            continue
+                        seen_local.add(c)
+                        freq[c] = freq.get(c, 0) + 1
+                globals()['GLOBAL_CARD_FREQ'] = (freq, total_themes)  # type: ignore
+            # Apply duplicate filtering to candidate lists (do NOT mutate existing example_cards)
+            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' in globals():  # type: ignore
+                freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']  # type: ignore
+                if total_prev > 0:  # avoid div-by-zero
+                    cutoff = args.common_card_threshold
+                    def _filter(lst: List[Tuple[float, str, Set[str]]]) -> List[Tuple[float, str, Set[str]]]:
+                        out: List[Tuple[float, str, Set[str]]] = []
+                        for r, n, cset in lst:
+                            if (freq_map.get(n, 0) / total_prev) > cutoff:
+                                continue
+                            out.append((r, n, cset))
+                        return out
+                    if display in theme_card_hits:
+                        theme_card_hits[display] = _filter(theme_card_hits[display])
+                    for syn in (data.get('synergies') or []):
+                        if syn in theme_card_hits:
+                            theme_card_hits[syn] = _filter(theme_card_hits[syn])
+            changed_cards, added_cards = fill_example_cards(
+                data,
+                theme_card_hits,
+                card_color_pool,
+                # Keep target upper bound even when --no-generic-pad so we still collect
+                # base + synergy thematic cards; the flag simply disables color/generic
+                # fallback padding rather than suppressing all population.
+                args.cards_target,
+                avoid=avoid,
+                allow_color_fallback=(not args.cards_no_color_fallback and not args.no_generic_pad),
+                rebuild=args.rebuild_example_cards,
+            )
+            # Optional second pass limited color fallback for sparse themes
+            if (not changed_cards or len(data.get('example_cards', []) or []) < args.cards_target) and args.cards_limited_color_fallback_threshold > 0 and args.cards_no_color_fallback:
+                current_len = len(data.get('example_cards') or [])
+                if current_len < args.cards_limited_color_fallback_threshold:
+                    # Top up with color fallback only for remaining slots
+                    changed2, added2 = fill_example_cards(
+                        data,
+                        theme_card_hits,
+                        card_color_pool,
+                        args.cards_target,
+                        avoid=avoid,
+                        allow_color_fallback=True,
+                        rebuild=False,
+                    )
+                    if changed2:
+                        changed_cards = True
+                        added_cards.extend(added2)
+            if changed_cards:
+                cards_changed += 1
+                print(f"[cards] {path.name}: {pre_cards_len} -> {len(data['example_cards'])} (added {len(added_cards)})")
+                if args.apply:
+                    save_yaml(path, data)
+    print(f"[promote] modified {changed_count} themes")
+    if args.fill_example_cards:
+        print(f"[cards] modified {cards_changed} themes (target {args.cards_target})")
+        if args.print_dup_metrics and 'GLOBAL_CARD_FREQ' in globals():  # type: ignore
+            freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']  # type: ignore
+            if total_prev:
+                items = sorted(freq_map.items(), key=lambda x: (-x[1], x[0]))[:30]
+                print('[dup-metrics] Top shared example_cards (baseline before this run):')
+                for name, cnt in items:
+                    print(f"  {name}: {cnt}/{total_prev} ({cnt/max(total_prev,1):.1%})")
+    raise SystemExit(0)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    main()