2025-09-19 11:53:52 -07:00
""" Editorial population helper for theme YAML files.
Features implemented here :
Commander population modes :
- Padding : Fill undersized example_commanders lists ( < - - min ) with synergy - derived commanders .
- Rebalance : Prepend missing base - theme commanders if list already meets - - min but lacks them .
- Base - first rebuild : Overwrite lists using ordering ( base tag - > synergy tag - > color fallback ) , truncating to - - min .
Example cards population ( NEW ) :
- Optional ( - - fill - example - cards ) creation / padding of example_cards lists to a target size ( default 10 )
using base theme cards first , then synergy theme cards , then color - identity fallback .
- EDHREC ordering : Uses ascending edhrecRank sourced from cards . csv ( if present ) or shard CSVs .
- Avoids reusing commander names ( base portion of commander entries ) to diversify examples .
Safeguards :
- Dry run by default ( no writes unless - - apply )
- Does not truncate existing example_cards if already > = target
- Deduplicates by raw card name
Typical usage :
Populate commanders only ( padding ) :
python code / scripts / synergy_promote_fill . py - - min 5 - - apply
Base - first rebuild of commanders AND populate 10 example cards :
python code / scripts / synergy_promote_fill . py - - base - first - rebuild - - min 5 \
- - fill - example - cards - - cards - target 10 - - apply
Only fill example cards ( leave commanders untouched ) :
python code / scripts / synergy_promote_fill . py - - fill - example - cards - - cards - target 10 - - apply
"""
from __future__ import annotations
import argparse
import ast
import csv
from pathlib import Path
from typing import Dict , List , Tuple , Set , Iterable , Optional
try :
import yaml # type: ignore
except Exception : # pragma: no cover
yaml = None
ROOT = Path ( __file__ ) . resolve ( ) . parents [ 2 ]
CSV_DIR = ROOT / ' csv_files '
CATALOG_DIR = ROOT / ' config ' / ' themes ' / ' catalog '
COLOR_CSV_GLOB = ' *_cards.csv '
COMMANDER_FILE = ' commander_cards.csv '
MASTER_CARDS_FILE = ' cards.csv '
def parse_theme_tags ( raw : str ) - > List [ str ] :
if not raw :
return [ ]
raw = raw . strip ( )
if not raw or raw == ' [] ' :
return [ ]
try :
val = ast . literal_eval ( raw )
if isinstance ( val , list ) :
return [ str ( x ) for x in val if isinstance ( x , str ) ]
except Exception :
pass
return [ t . strip ( ) . strip ( " ' \" " ) for t in raw . strip ( ' [] ' ) . split ( ' , ' ) if t . strip ( ) ]
def parse_color_identity ( raw : str | None ) - > Set [ str ] :
if not raw :
return set ( )
raw = raw . strip ( )
if not raw :
return set ( )
try :
val = ast . literal_eval ( raw )
if isinstance ( val , ( list , tuple ) ) :
return { str ( x ) . upper ( ) for x in val if str ( x ) . upper ( ) in { ' W ' , ' U ' , ' B ' , ' R ' , ' G ' , ' C ' } }
except Exception :
pass
# fallback: collect mana letters present
return { ch for ch in raw . upper ( ) if ch in { ' W ' , ' U ' , ' B ' , ' R ' , ' G ' , ' C ' } }
def scan_sources ( max_rank : float ) - > Tuple [ Dict [ str , List [ Tuple [ float , str ] ] ] , Dict [ str , List [ Tuple [ float , str ] ] ] , List [ Tuple [ float , str , Set [ str ] ] ] ] :
""" Build commander candidate pools exclusively from commander_cards.csv.
We intentionally ignore the color shard * _cards . csv sources here because those
include many non - commander legendary permanents or context - specific lists ; using
only commander_cards . csv guarantees every suggestion is a legal commander .
Returns :
theme_hits : mapping theme tag - > sorted unique list of ( rank , commander name )
theme_all_legendary_hits : alias of theme_hits ( legacy return shape )
color_pool : list of ( rank , commander name , color identity set )
"""
theme_hits : Dict [ str , List [ Tuple [ float , str ] ] ] = { }
color_pool : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
commander_path = CSV_DIR / COMMANDER_FILE
if not commander_path . exists ( ) :
return { } , { } , [ ]
try :
with commander_path . open ( encoding = ' utf-8 ' , newline = ' ' ) as f :
reader = csv . DictReader ( f )
for row in reader :
try :
rank = float ( row . get ( ' edhrecRank ' ) or 999999 )
except Exception :
rank = 999999
if rank > max_rank :
continue
typ = row . get ( ' type ' ) or ' '
if ' Legendary ' not in typ :
continue
name = row . get ( ' name ' ) or ' '
if not name :
continue
ci = parse_color_identity ( row . get ( ' colorIdentity ' ) or row . get ( ' colors ' ) )
color_pool . append ( ( rank , name , ci ) )
tags_raw = row . get ( ' themeTags ' ) or ' '
if tags_raw :
for t in parse_theme_tags ( tags_raw ) :
theme_hits . setdefault ( t , [ ] ) . append ( ( rank , name ) )
except Exception :
pass
# Deduplicate + sort theme hits
for t , lst in theme_hits . items ( ) :
lst . sort ( key = lambda x : x [ 0 ] )
seen : Set [ str ] = set ( )
dedup : List [ Tuple [ float , str ] ] = [ ]
for r , n in lst :
if n in seen :
continue
seen . add ( n )
dedup . append ( ( r , n ) )
theme_hits [ t ] = dedup
# Deduplicate color pool (keep best rank)
color_pool . sort ( key = lambda x : x [ 0 ] )
seen_cp : Set [ str ] = set ( )
dedup_pool : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
for r , n , cset in color_pool :
if n in seen_cp :
continue
seen_cp . add ( n )
dedup_pool . append ( ( r , n , cset ) )
return theme_hits , theme_hits , dedup_pool
def scan_card_pool ( max_rank : float , use_master : bool = False ) - > Tuple [ Dict [ str , List [ Tuple [ float , str , Set [ str ] ] ] ] , List [ Tuple [ float , str , Set [ str ] ] ] ] :
""" Scan non-commander card pool for example_cards population.
Default behavior ( preferred per project guidance ) : ONLY use the shard color CSVs ( [ color ] _cards . csv ) .
The consolidated master ` ` cards . csv ` ` contains every card face / variant and can introduce duplicate
or art - variant noise ( e . g . , " Sol Ring // Sol Ring " ) . We therefore avoid it unless explicitly
requested via ` ` use_master = True ` ` / ` ` - - use - master - cards ` ` .
When the master file is used we prefer ` ` faceName ` ` over ` ` name ` ` ( falls back to name ) and
collapse redundant split names like " Foo // Foo " to just " Foo " .
Returns :
theme_card_hits : mapping theme tag - > [ ( rank , card name , color set ) ] sorted & deduped
color_pool : global list of unique cards for color fallback
"""
theme_card_hits : Dict [ str , List [ Tuple [ float , str , Set [ str ] ] ] ] = { }
color_pool : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
master_path = CSV_DIR / MASTER_CARDS_FILE
def canonical_name ( row : Dict [ str , str ] ) - > str :
nm = ( row . get ( ' faceName ' ) or row . get ( ' name ' ) or ' ' ) . strip ( )
if ' // ' in nm :
parts = [ p . strip ( ) for p in nm . split ( ' // ' ) ]
if len ( parts ) == 2 and parts [ 0 ] == parts [ 1 ] :
nm = parts [ 0 ]
return nm
def _process_row ( row : Dict [ str , str ] ) :
try :
rank = float ( row . get ( ' edhrecRank ' ) or 999999 )
except Exception :
rank = 999999
if rank > max_rank :
return
# Prefer canonicalized name (faceName if present; collapse duplicate split faces)
name = canonical_name ( row )
if not name :
return
ci = parse_color_identity ( row . get ( ' colorIdentity ' ) or row . get ( ' colors ' ) )
tags_raw = row . get ( ' themeTags ' ) or ' '
if tags_raw :
for t in parse_theme_tags ( tags_raw ) :
theme_card_hits . setdefault ( t , [ ] ) . append ( ( rank , name , ci ) )
color_pool . append ( ( rank , name , ci ) )
# Collection strategy
if use_master and master_path . exists ( ) :
try :
with master_path . open ( encoding = ' utf-8 ' , newline = ' ' ) as f :
reader = csv . DictReader ( f )
for row in reader :
_process_row ( row )
except Exception :
pass # fall through to shards if master problematic
# Always process shards (either primary source or to ensure we have coverage if master read failed)
if not use_master or not master_path . exists ( ) :
for fp in sorted ( CSV_DIR . glob ( COLOR_CSV_GLOB ) ) :
if fp . name in { COMMANDER_FILE } :
continue
if ' testdata ' in str ( fp ) :
continue
try :
with fp . open ( encoding = ' utf-8 ' , newline = ' ' ) as f :
reader = csv . DictReader ( f )
for row in reader :
_process_row ( row )
except Exception :
continue
# Dedup + rank-sort per theme
for t , lst in theme_card_hits . items ( ) :
lst . sort ( key = lambda x : x [ 0 ] )
seen : Set [ str ] = set ( )
dedup : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
for r , n , cset in lst :
if n in seen :
continue
seen . add ( n )
dedup . append ( ( r , n , cset ) )
theme_card_hits [ t ] = dedup
# Dedup global color pool (keep best rank occurrence)
color_pool . sort ( key = lambda x : x [ 0 ] )
seen_global : Set [ str ] = set ( )
dedup_global : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
for r , n , cset in color_pool :
if n in seen_global :
continue
seen_global . add ( n )
dedup_global . append ( ( r , n , cset ) )
return theme_card_hits , dedup_global
def load_yaml ( path : Path ) - > dict :
try :
return yaml . safe_load ( path . read_text ( encoding = ' utf-8 ' ) ) if yaml else { }
except Exception :
return { }
def save_yaml ( path : Path , data : dict ) :
txt = yaml . safe_dump ( data , sort_keys = False , allow_unicode = True )
path . write_text ( txt , encoding = ' utf-8 ' )
def theme_color_set ( data : dict ) - > Set [ str ] :
mapping = { ' White ' : ' W ' , ' Blue ' : ' U ' , ' Black ' : ' B ' , ' Red ' : ' R ' , ' Green ' : ' G ' , ' Colorless ' : ' C ' }
out : Set [ str ] = set ( )
for key in ( ' primary_color ' , ' secondary_color ' , ' tertiary_color ' ) :
val = data . get ( key )
if isinstance ( val , str ) and val in mapping :
out . add ( mapping [ val ] )
return out
def rebuild_base_first (
data : dict ,
theme_hits : Dict [ str , List [ Tuple [ float , str ] ] ] ,
min_examples : int ,
color_pool : Iterable [ Tuple [ float , str , Set [ str ] ] ] ,
annotate_color_reason : bool = False ,
) - > List [ str ] :
""" Return new example_commanders list using base-first strategy. """
if not isinstance ( data , dict ) :
return [ ]
display = data . get ( ' display_name ' ) or ' '
synergies = data . get ( ' synergies ' ) if isinstance ( data . get ( ' synergies ' ) , list ) else [ ]
chosen : List [ str ] = [ ]
used : Set [ str ] = set ( )
# Base theme hits first (rank order)
for _ , cname in theme_hits . get ( display , [ ] ) :
if len ( chosen ) > = min_examples :
break
if cname in used :
continue
chosen . append ( cname )
used . add ( cname )
# Synergy hits annotated
if len ( chosen ) < min_examples :
for syn in synergies :
for _ , cname in theme_hits . get ( syn , [ ] ) :
if len ( chosen ) > = min_examples :
break
if cname in used :
continue
chosen . append ( f " { cname } - Synergy ( { syn } ) " )
used . add ( cname )
if len ( chosen ) > = min_examples :
break
# Color fallback
if len ( chosen ) < min_examples :
t_colors = theme_color_set ( data )
if t_colors :
for _ , cname , cset in color_pool :
if len ( chosen ) > = min_examples :
break
if cset - t_colors :
continue
if cname in used :
continue
if annotate_color_reason :
chosen . append ( f " { cname } - Color Fallback (no on-theme commander available) " )
else :
chosen . append ( cname )
used . add ( cname )
return chosen [ : min_examples ]
def fill_example_cards (
data : dict ,
theme_card_hits : Dict [ str , List [ Tuple [ float , str , Set [ str ] ] ] ] ,
color_pool : Iterable [ Tuple [ float , str , Set [ str ] ] ] ,
target : int ,
avoid : Optional [ Set [ str ] ] = None ,
allow_color_fallback : bool = True ,
rebuild : bool = False ,
) - > Tuple [ bool , List [ str ] ] :
""" Populate or pad example_cards using base->synergy->color ordering.
- Card ordering within each phase preserves ascending EDHREC rank ( already sorted ) .
- ' avoid ' set lets us skip commander names to diversify examples .
- Does not shrink an overfilled list ( only grows up to target ) .
Returns ( changed , added_entries ) .
"""
if not isinstance ( data , dict ) :
return False , [ ]
cards_field = data . get ( ' example_cards ' )
if not isinstance ( cards_field , list ) :
cards_field = [ ]
# Rebuild forces clearing existing list so we can repopulate even if already at target size
if rebuild :
cards_field = [ ]
original = list ( cards_field )
if len ( cards_field ) > = target and not rebuild :
return False , [ ] # nothing to do when already populated unless rebuilding
display = data . get ( ' display_name ' ) or ' '
synergies = data . get ( ' synergies ' ) if isinstance ( data . get ( ' synergies ' ) , list ) else [ ]
used : Set [ str ] = { c for c in cards_field if isinstance ( c , str ) }
if avoid :
used | = avoid
# Phase 1: base theme cards
for _ , name , _ in theme_card_hits . get ( display , [ ] ) :
if len ( cards_field ) > = target :
break
if name in used :
continue
cards_field . append ( name )
used . add ( name )
# Phase 2: synergy cards
if len ( cards_field ) < target :
for syn in synergies :
for _ , name , _ in theme_card_hits . get ( syn , [ ] ) :
if len ( cards_field ) > = target :
break
if name in used :
continue
cards_field . append ( name )
used . add ( name )
if len ( cards_field ) > = target :
break
# Phase 3: color fallback
if allow_color_fallback and len ( cards_field ) < target :
t_colors = theme_color_set ( data )
if t_colors :
for _ , name , cset in color_pool :
if len ( cards_field ) > = target :
break
if name in used :
continue
if cset - t_colors :
continue
cards_field . append ( name )
used . add ( name )
# Trim safeguard (should not exceed target)
if len ( cards_field ) > target :
del cards_field [ target : ]
if cards_field != original :
data [ ' example_cards ' ] = cards_field
added = [ c for c in cards_field if c not in original ]
return True , added
return False , [ ]
def pad_theme (
data : dict ,
theme_hits : Dict [ str , List [ Tuple [ float , str ] ] ] ,
min_examples : int ,
color_pool : Iterable [ Tuple [ float , str , Set [ str ] ] ] ,
base_min : int = 2 ,
drop_annotation_if_base : bool = True ,
) - > Tuple [ bool , List [ str ] ] :
""" Return (changed, added_entries).
Hybrid strategy :
1. Ensure up to base_min commanders directly tagged with the base theme ( display_name ) appear ( unannotated )
before filling remaining slots .
2. Then add synergy - tagged commanders ( annotated ) in listed order , skipping duplicates .
3. If still short , cycle remaining base hits ( if any unused ) and then color fallback .
4. If a commander is both a base hit and added during synergy phase and drop_annotation_if_base = True ,
we emit it unannotated to highlight it as a flagship example .
"""
if not isinstance ( data , dict ) :
return False , [ ]
examples = data . get ( ' example_commanders ' )
if not isinstance ( examples , list ) :
# Treat missing / invalid field as empty to allow first-time population
examples = [ ]
data [ ' example_commanders ' ] = examples
if len ( examples ) > = min_examples :
return False , [ ]
synergies = data . get ( ' synergies ' ) if isinstance ( data . get ( ' synergies ' ) , list ) else [ ]
display = data . get ( ' display_name ' ) or ' '
base_names = { e . split ( ' - Synergy ' ) [ 0 ] for e in examples if isinstance ( e , str ) }
added : List [ str ] = [ ]
# Phase 1: seed with base theme commanders (unannotated) up to base_min
base_cands = theme_hits . get ( display ) or [ ]
for _ , cname in base_cands :
if len ( examples ) + len ( added ) > = min_examples or len ( [ a for a in added if ' - Synergy ( ' not in a ] ) > = base_min :
break
if cname in base_names :
continue
base_names . add ( cname )
added . append ( cname )
# Phase 2: synergy-based candidates following list order
for syn in synergies :
if len ( examples ) + len ( added ) > = min_examples :
break
cand_list = theme_hits . get ( syn ) or [ ]
for _ , cname in cand_list :
if len ( examples ) + len ( added ) > = min_examples :
break
if cname in base_names :
continue
# If commander is ALSO tagged with base theme and we want a clean flagship, drop annotation
base_tagged = any ( cname == bn for _ , bn in base_cands )
if base_tagged and drop_annotation_if_base :
annotated = cname
else :
annotated = f " { cname } - Synergy ( { syn } ) "
base_names . add ( cname )
added . append ( annotated )
# Phase 3: if still short, add any remaining unused base hits (unannotated)
if len ( examples ) + len ( added ) < min_examples :
for _ , cname in base_cands :
if len ( examples ) + len ( added ) > = min_examples :
break
if cname in base_names :
continue
base_names . add ( cname )
added . append ( cname )
if len ( examples ) + len ( added ) < min_examples :
# Color-aware fallback: fill with top-ranked legendary commanders whose color identity is subset of theme colors
t_colors = theme_color_set ( data )
if t_colors :
for _ , cname , cset in color_pool :
if len ( examples ) + len ( added ) > = min_examples :
break
if not cset : # colorless commander acceptable if theme includes C or any color (subset logic handles)
pass
if cset - t_colors :
continue # requires colors outside theme palette
if cname in base_names :
continue
base_names . add ( cname )
added . append ( cname ) # unannotated to avoid invalid synergy annotation
if added :
data [ ' example_commanders ' ] = examples + added
return True , added
return False , [ ]
def main ( ) : # pragma: no cover (script orchestration)
ap = argparse . ArgumentParser ( description = ' Synergy-based padding for undersized example_commanders lists ' )
ap . add_argument ( ' --min ' , type = int , default = 5 , help = ' Minimum target examples (default 5) ' )
ap . add_argument ( ' --max-rank ' , type = float , default = 60000 , help = ' EDHREC rank ceiling for candidate commanders ' )
ap . add_argument ( ' --base-min ' , type = int , default = 2 , help = ' Minimum number of base-theme commanders (default 2) ' )
ap . add_argument ( ' --no-drop-base-annotation ' , action = ' store_true ' , help = ' Do not drop synergy annotation when commander also has base theme tag ' )
ap . add_argument ( ' --rebalance ' , action = ' store_true ' , help = ' Adjust themes already meeting --min if they lack required base-theme commanders ' )
ap . add_argument ( ' --base-first-rebuild ' , action = ' store_true ' , help = ' Overwrite lists using base-first strategy (base -> synergy -> color) ' )
ap . add_argument ( ' --apply ' , action = ' store_true ' , help = ' Write changes (default dry-run) ' )
# Example cards population flags
ap . add_argument ( ' --fill-example-cards ' , action = ' store_true ' , help = ' Populate example_cards (base->synergy->[color fallback]) ' )
ap . add_argument ( ' --cards-target ' , type = int , default = 10 , help = ' Target number of example_cards (default 10) ' )
ap . add_argument ( ' --cards-max-rank ' , type = float , default = 60000 , help = ' EDHREC rank ceiling for example_cards candidates ' )
ap . add_argument ( ' --cards-no-color-fallback ' , action = ' store_true ' , help = ' Do NOT use color identity fallback for example_cards (only theme & synergies) ' )
ap . add_argument ( ' --rebuild-example-cards ' , action = ' store_true ' , help = ' Discard existing example_cards and rebuild from scratch ' )
ap . add_argument ( ' --text-heuristics ' , action = ' store_true ' , help = ' Augment example_cards by scanning card text for theme keywords when direct tag hits are empty ' )
ap . add_argument ( ' --no-generic-pad ' , action = ' store_true ' , help = ' When true, leave example_cards shorter than target instead of filling with generic color-fallback or staple cards ' )
ap . add_argument ( ' --annotate-color-fallback-commanders ' , action = ' store_true ' , help = ' Annotate color fallback commander additions with reason when base/synergy empty ' )
ap . add_argument ( ' --heuristic-rank-cap ' , type = float , default = 25000 , help = ' Maximum EDHREC rank allowed for heuristic text-derived candidates (default 25000) ' )
ap . add_argument ( ' --use-master-cards ' , action = ' store_true ' , help = ' Use consolidated master cards.csv (default: use only shard [color]_cards.csv files) ' )
ap . add_argument ( ' --cards-limited-color-fallback-threshold ' , type = int , default = 0 , help = ' If >0 and color fallback disabled, allow a second limited color fallback pass only for themes whose example_cards count remains below this threshold after heuristics ' )
ap . add_argument ( ' --common-card-threshold ' , type = float , default = 0.18 , help = ' Exclude candidate example_cards appearing (before build) in > this fraction of themes (default 0.18 = 18 % ) ' )
ap . add_argument ( ' --print-dup-metrics ' , action = ' store_true ' , help = ' Print global duplicate frequency metrics for example_cards after run ' )
args = ap . parse_args ( )
if yaml is None :
print ( ' PyYAML not installed ' )
raise SystemExit ( 1 )
theme_hits , _ , color_pool = scan_sources ( args . max_rank )
theme_card_hits : Dict [ str , List [ Tuple [ float , str , Set [ str ] ] ] ] = { }
card_color_pool : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
name_index : Dict [ str , Tuple [ float , str , Set [ str ] ] ] = { }
if args . fill_example_cards :
theme_card_hits , card_color_pool = scan_card_pool ( args . cards_max_rank , use_master = args . use_master_cards )
# Build quick lookup for manual overrides
name_index = { n : ( r , n , c ) for r , n , c in card_color_pool }
changed_count = 0
cards_changed = 0
# Precompute text index lazily only if requested
text_index : Dict [ str , List [ Tuple [ float , str , Set [ str ] ] ] ] = { }
staples_block : Set [ str ] = { # common generic staples to suppress unless they match heuristics explicitly
' Sol Ring ' , ' Arcane Signet ' , ' Command Tower ' , ' Exotic Orchard ' , ' Path of Ancestry ' , ' Swiftfoot Boots ' , ' Lightning Greaves ' , ' Reliquary Tower '
}
# Build text index if heuristics requested
if args . text_heuristics :
# Build text index from the same source strategy: master (optional) + shards, honoring faceName & canonical split collapse.
import re
def _scan_rows_for_text ( reader ) :
for row in reader :
try :
rank = float ( row . get ( ' edhrecRank ' ) or 999999 )
except Exception :
rank = 999999
if rank > args . cards_max_rank :
continue
# canonical naming logic (mirrors scan_card_pool)
nm = ( row . get ( ' faceName ' ) or row . get ( ' name ' ) or ' ' ) . strip ( )
if ' // ' in nm :
parts = [ p . strip ( ) for p in nm . split ( ' // ' ) ]
if len ( parts ) == 2 and parts [ 0 ] == parts [ 1 ] :
nm = parts [ 0 ]
if not nm :
continue
text = ( row . get ( ' text ' ) or ' ' ) . lower ( )
ci = parse_color_identity ( row . get ( ' colorIdentity ' ) or row . get ( ' colors ' ) )
tokens = set ( re . findall ( r " \ +1/ \ +1|[a-zA-Z ' ]+ " , text ) )
for t in tokens :
if not t :
continue
bucket = text_index . setdefault ( t , [ ] )
bucket . append ( ( rank , nm , ci ) )
try :
if args . use_master_cards and ( CSV_DIR / MASTER_CARDS_FILE ) . exists ( ) :
with ( CSV_DIR / MASTER_CARDS_FILE ) . open ( encoding = ' utf-8 ' , newline = ' ' ) as f :
_scan_rows_for_text ( csv . DictReader ( f ) )
# Always include shards (they are authoritative curated sets)
for fp in sorted ( CSV_DIR . glob ( COLOR_CSV_GLOB ) ) :
if fp . name in { COMMANDER_FILE } or ' testdata ' in str ( fp ) :
continue
with fp . open ( encoding = ' utf-8 ' , newline = ' ' ) as f :
_scan_rows_for_text ( csv . DictReader ( f ) )
# sort & dedup per token
for tok , lst in text_index . items ( ) :
lst . sort ( key = lambda x : x [ 0 ] )
seen_tok : Set [ str ] = set ( )
dedup_tok : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
for r , n , c in lst :
if n in seen_tok :
continue
seen_tok . add ( n )
dedup_tok . append ( ( r , n , c ) )
text_index [ tok ] = dedup_tok
except Exception :
text_index = { }
def heuristic_candidates ( theme_name : str ) - > List [ Tuple [ float , str , Set [ str ] ] ] :
if not args . text_heuristics or not text_index :
return [ ]
name_lower = theme_name . lower ( )
manual : Dict [ str , List [ str ] ] = {
' landfall ' : [ ' landfall ' ] ,
' reanimate ' : [ ' reanimate ' , ' unearth ' , ' eternalize ' , ' return ' , ' graveyard ' ] ,
' tokens matter ' : [ ' token ' , ' populate ' , ' clue ' , ' treasure ' , ' food ' , ' blood ' , ' incubator ' , ' map ' , ' powerstone ' , ' role ' ] ,
' +1/+1 counters ' : [ ' +1/+1 ' , ' counter ' , ' proliferate ' , ' adapt ' , ' evolve ' ] ,
' superfriends ' : [ ' planeswalker ' , ' loyalty ' , ' proliferate ' ] ,
' aggro ' : [ ' haste ' , ' attack ' , ' battalion ' , ' raid ' , ' melee ' ] ,
' lifegain ' : [ ' life ' , ' lifelink ' ] ,
' graveyard matters ' : [ ' graveyard ' , ' dies ' , ' mill ' , ' disturb ' , ' flashback ' ] ,
' group hug ' : [ ' draw ' , ' each ' , ' everyone ' , ' opponent ' , ' card ' , ' all ' ] ,
' politics ' : [ ' each ' , ' player ' , ' vote ' , ' council ' ] ,
' stax ' : [ ' sacrifice ' , ' upkeep ' , ' each ' , ' player ' , ' skip ' ] ,
' aristocrats ' : [ ' dies ' , ' sacrifice ' , ' token ' ] ,
' sacrifice matters ' : [ ' sacrifice ' , ' dies ' ] ,
' sacrifice to draw ' : [ ' sacrifice ' , ' draw ' ] ,
' artifact tokens ' : [ ' treasure ' , ' clue ' , ' food ' , ' blood ' , ' powerstone ' , ' incubator ' , ' map ' ] ,
' archer kindred ' : [ ' archer ' , ' bow ' , ' ranged ' ] ,
' eerie ' : [ ' enchant ' , ' aura ' , ' role ' , ' eerie ' ] ,
}
# Manual hand-picked iconic cards per theme (prioritized before token buckets)
manual_cards : Dict [ str , List [ str ] ] = {
' group hug ' : [
' Howling Mine ' , ' Temple Bell ' , ' Rites of Flourishing ' , ' Kami of the Crescent Moon ' , ' Dictate of Kruphix ' ,
' Font of Mythos ' , ' Minds Aglow ' , ' Collective Voyage ' , ' Horn of Greed ' , ' Prosperity '
] ,
' reanimate ' : [
' Reanimate ' , ' Animate Dead ' , ' Victimize ' , ' Living Death ' , ' Necromancy ' ,
' Exhume ' , ' Dread Return ' , ' Unburial Rites ' , ' Persist ' , ' Stitch Together '
] ,
' archer kindred ' : [
' Greatbow Doyen ' , ' Archer \' s Parapet ' , ' Jagged-Scar Archers ' , ' Silklash Spider ' , ' Elite Scaleguard ' ,
' Kyren Sniper ' , ' Viridian Longbow ' , ' Brigid, Hero of Kinsbaile ' , ' Longshot Squad ' , ' Evolution Sage '
] ,
' eerie ' : [
' Sythis, Harvest \' s Hand ' , ' Enchantress \' s Presence ' , ' Setessan Champion ' , ' Eidolon of Blossoms ' , ' Mesa Enchantress ' ,
' Sterling Grove ' , ' Calix, Guided by Fate ' , ' Femeref Enchantress ' , ' Satyr Enchanter ' , ' Argothian Enchantress '
] ,
}
keys = manual . get ( name_lower , [ ] )
if not keys :
# derive naive tokens: split words >3 chars
import re
keys = [ w for w in re . findall ( r ' [a-zA-Z \ + \ /]+ ' , name_lower ) if len ( w ) > 3 or ' +1/+1 ' in w ]
merged : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
seen : Set [ str ] = set ( )
# Insert manual card overrides first (respect rank cap if available)
if name_lower in manual_cards and name_index :
for card in manual_cards [ name_lower ] :
tup = name_index . get ( card )
if not tup :
continue
r , n , ci = tup
if r > args . heuristic_rank_cap :
continue
if n in seen :
continue
seen . add ( n )
merged . append ( tup )
for k in keys :
bucket = text_index . get ( k )
if not bucket :
continue
for r , n , ci in bucket [ : 120 ] :
if n in seen :
continue
if r > args . heuristic_rank_cap :
continue
# skip staples if they lack the keyword in name (avoid universal ramp/utility artifacts)
if n in staples_block and k not in n . lower ( ) :
continue
seen . add ( n )
merged . append ( ( r , n , ci ) )
if len ( merged ) > = 60 :
break
return merged
for path in sorted ( CATALOG_DIR . glob ( ' *.yml ' ) ) :
data = load_yaml ( path )
if not data or not isinstance ( data , dict ) or not data . get ( ' display_name ' ) :
continue
notes = data . get ( ' notes ' )
if isinstance ( notes , str ) and ' Deprecated alias file ' in notes :
continue
ex = data . get ( ' example_commanders ' )
if not isinstance ( ex , list ) :
ex = [ ]
data [ ' example_commanders ' ] = ex
need_rebalance = False
if args . base_first_rebuild :
new_list = rebuild_base_first (
data ,
theme_hits ,
args . min ,
color_pool ,
annotate_color_reason = args . annotate_color_fallback_commanders ,
)
if new_list != ex :
data [ ' example_commanders ' ] = new_list
changed_count + = 1
print ( f " [rebuild] { path . name } : { len ( ex ) } -> { len ( new_list ) } " )
if args . apply :
save_yaml ( path , data )
else :
if len ( ex ) > = args . min :
if args . rebalance and data . get ( ' display_name ' ) :
base_tag = data [ ' display_name ' ]
base_cands = { n for _ , n in theme_hits . get ( base_tag , [ ] ) }
existing_base_examples = [ e for e in ex if ( e . split ( ' - Synergy ' ) [ 0 ] ) in base_cands and ' - Synergy ( ' not in e ]
if len ( existing_base_examples ) < args . base_min and base_cands :
need_rebalance = True
if not need_rebalance :
pass # leave commanders untouched (might still fill cards)
if need_rebalance :
orig_len = len ( ex )
base_tag = data [ ' display_name ' ]
base_cands_ordered = [ n for _ , n in theme_hits . get ( base_tag , [ ] ) ]
current_base_names = { e . split ( ' - Synergy ' ) [ 0 ] for e in ex }
additions : List [ str ] = [ ]
for cname in base_cands_ordered :
if len ( [ a for a in ex + additions if ' - Synergy ( ' not in a ] ) > = args . base_min :
break
if cname in current_base_names :
continue
additions . append ( cname )
current_base_names . add ( cname )
if additions :
data [ ' example_commanders ' ] = additions + ex
changed_count + = 1
print ( f " [rebalance] { path . name } : inserted { len ( additions ) } base exemplars (len { orig_len } -> { len ( data [ ' example_commanders ' ] ) } ) " )
if args . apply :
save_yaml ( path , data )
else :
if len ( ex ) < args . min :
orig_len = len ( ex )
changed , added = pad_theme (
data ,
theme_hits ,
args . min ,
color_pool ,
base_min = args . base_min ,
drop_annotation_if_base = not args . no_drop_base_annotation ,
)
if changed :
changed_count + = 1
print ( f " [promote] { path . name } : { orig_len } -> { len ( data [ ' example_commanders ' ] ) } (added { len ( added ) } ) " )
if args . apply :
save_yaml ( path , data )
# Example cards population
if args . fill_example_cards :
avoid = { c . split ( ' - Synergy ' ) [ 0 ] for c in data . get ( ' example_commanders ' , [ ] ) if isinstance ( c , str ) }
pre_cards_len = len ( data . get ( ' example_cards ' ) or [ ] ) if isinstance ( data . get ( ' example_cards ' ) , list ) else 0
# If no direct tag hits for base theme AND heuristics enabled, inject synthetic hits
display = data . get ( ' display_name ' ) or ' '
if args . text_heuristics and display and not theme_card_hits . get ( display ) :
cand = heuristic_candidates ( display )
if cand :
theme_card_hits [ display ] = cand
# Build global duplicate frequency map ONCE (baseline prior to this run) if threshold active
2025-10-31 10:11:00 -07:00
if args . common_card_threshold > 0 and ' GLOBAL_CARD_FREQ ' not in globals ( ) :
2025-09-19 11:53:52 -07:00
freq : Dict [ str , int ] = { }
total_themes = 0
for fp0 in CATALOG_DIR . glob ( ' *.yml ' ) :
dat0 = load_yaml ( fp0 )
if not isinstance ( dat0 , dict ) :
continue
ecs0 = dat0 . get ( ' example_cards ' )
if not isinstance ( ecs0 , list ) or not ecs0 :
continue
total_themes + = 1
seen_local : Set [ str ] = set ( )
for c in ecs0 :
if not isinstance ( c , str ) or c in seen_local :
continue
seen_local . add ( c )
freq [ c ] = freq . get ( c , 0 ) + 1
2025-10-31 10:11:00 -07:00
globals ( ) [ ' GLOBAL_CARD_FREQ ' ] = ( freq , total_themes )
2025-09-19 11:53:52 -07:00
# Apply duplicate filtering to candidate lists (do NOT mutate existing example_cards)
2025-10-31 10:11:00 -07:00
if args . common_card_threshold > 0 and ' GLOBAL_CARD_FREQ ' in globals ( ) :
freq_map , total_prev = globals ( ) [ ' GLOBAL_CARD_FREQ ' ]
2025-09-19 11:53:52 -07:00
if total_prev > 0 : # avoid div-by-zero
cutoff = args . common_card_threshold
def _filter ( lst : List [ Tuple [ float , str , Set [ str ] ] ] ) - > List [ Tuple [ float , str , Set [ str ] ] ] :
out : List [ Tuple [ float , str , Set [ str ] ] ] = [ ]
for r , n , cset in lst :
if ( freq_map . get ( n , 0 ) / total_prev ) > cutoff :
continue
out . append ( ( r , n , cset ) )
return out
if display in theme_card_hits :
theme_card_hits [ display ] = _filter ( theme_card_hits [ display ] )
for syn in ( data . get ( ' synergies ' ) or [ ] ) :
if syn in theme_card_hits :
theme_card_hits [ syn ] = _filter ( theme_card_hits [ syn ] )
changed_cards , added_cards = fill_example_cards (
data ,
theme_card_hits ,
card_color_pool ,
# Keep target upper bound even when --no-generic-pad so we still collect
# base + synergy thematic cards; the flag simply disables color/generic
# fallback padding rather than suppressing all population.
args . cards_target ,
avoid = avoid ,
allow_color_fallback = ( not args . cards_no_color_fallback and not args . no_generic_pad ) ,
rebuild = args . rebuild_example_cards ,
)
# Optional second pass limited color fallback for sparse themes
if ( not changed_cards or len ( data . get ( ' example_cards ' , [ ] ) or [ ] ) < args . cards_target ) and args . cards_limited_color_fallback_threshold > 0 and args . cards_no_color_fallback :
current_len = len ( data . get ( ' example_cards ' ) or [ ] )
if current_len < args . cards_limited_color_fallback_threshold :
# Top up with color fallback only for remaining slots
changed2 , added2 = fill_example_cards (
data ,
theme_card_hits ,
card_color_pool ,
args . cards_target ,
avoid = avoid ,
allow_color_fallback = True ,
rebuild = False ,
)
if changed2 :
changed_cards = True
added_cards . extend ( added2 )
if changed_cards :
cards_changed + = 1
print ( f " [cards] { path . name } : { pre_cards_len } -> { len ( data [ ' example_cards ' ] ) } (added { len ( added_cards ) } ) " )
if args . apply :
save_yaml ( path , data )
print ( f " [promote] modified { changed_count } themes " )
if args . fill_example_cards :
print ( f " [cards] modified { cards_changed } themes (target { args . cards_target } ) " )
2025-10-31 10:11:00 -07:00
if args . print_dup_metrics and ' GLOBAL_CARD_FREQ ' in globals ( ) :
freq_map , total_prev = globals ( ) [ ' GLOBAL_CARD_FREQ ' ]
2025-09-19 11:53:52 -07:00
if total_prev :
items = sorted ( freq_map . items ( ) , key = lambda x : ( - x [ 1 ] , x [ 0 ] ) ) [ : 30 ]
print ( ' [dup-metrics] Top shared example_cards (baseline before this run): ' )
for name , cnt in items :
print ( f " { name } : { cnt } / { total_prev } ( { cnt / max ( total_prev , 1 ) : .1% } ) " )
raise SystemExit ( 0 )
if __name__ == ' __main__ ' : # pragma: no cover
main ( )