mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 08:00:13 +01:00
feat(tagging+archetypes): add Pillowfort/Politics/Midrange/Toolbox tagging and unify archetype presence skip logic
This commit is contained in:
parent
f2a76d2ffc
commit
6d6243d6be
47 changed files with 21133 additions and 839 deletions
|
|
@ -22,8 +22,9 @@ import json
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
|
|
@ -32,12 +33,24 @@ try: # Optional
|
|||
except Exception: # pragma: no cover
|
||||
yaml = None
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / 'code'
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
|
||||
from scripts.extract_themes import ( # type: ignore
|
||||
try:
|
||||
# Support running as `python code/scripts/build_theme_catalog.py` when 'code' already on path
|
||||
from scripts.extract_themes import ( # type: ignore
|
||||
BASE_COLORS,
|
||||
collect_theme_tags_from_constants,
|
||||
collect_theme_tags_from_tagger_source,
|
||||
gather_theme_tag_rows,
|
||||
tally_tag_frequencies_by_base_color,
|
||||
compute_cooccurrence,
|
||||
cooccurrence_scores_for,
|
||||
derive_synergies_for_tags,
|
||||
apply_normalization,
|
||||
load_whitelist_config,
|
||||
should_keep_theme,
|
||||
)
|
||||
except ModuleNotFoundError:
|
||||
# Fallback: direct relative import when running within scripts package context
|
||||
from extract_themes import ( # type: ignore
|
||||
BASE_COLORS,
|
||||
collect_theme_tags_from_constants,
|
||||
collect_theme_tags_from_tagger_source,
|
||||
|
|
@ -48,8 +61,13 @@ from scripts.extract_themes import ( # type: ignore
|
|||
derive_synergies_for_tags,
|
||||
apply_normalization,
|
||||
load_whitelist_config,
|
||||
should_keep_theme,
|
||||
)
|
||||
should_keep_theme,
|
||||
)
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / 'code'
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
OUTPUT_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
|
||||
|
|
@ -66,6 +84,17 @@ class ThemeYAML:
|
|||
primary_color: Optional[str] = None
|
||||
secondary_color: Optional[str] = None
|
||||
notes: str = ''
|
||||
# Phase D+ editorial metadata (may be absent in older files)
|
||||
example_commanders: List[str] = field(default_factory=list)
|
||||
example_cards: List[str] = field(default_factory=list)
|
||||
synergy_commanders: List[str] = field(default_factory=list)
|
||||
deck_archetype: Optional[str] = None
|
||||
popularity_hint: Optional[str] = None
|
||||
popularity_bucket: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
editorial_quality: Optional[str] = None # draft|reviewed|final (optional quality flag)
|
||||
# Internal bookkeeping: source file path for backfill writes
|
||||
_path: Optional[Path] = None
|
||||
|
||||
|
||||
def _log(msg: str, verbose: bool): # pragma: no cover
|
||||
|
|
@ -103,6 +132,15 @@ def load_catalog_yaml(verbose: bool) -> Dict[str, ThemeYAML]:
|
|||
primary_color=data.get('primary_color'),
|
||||
secondary_color=data.get('secondary_color'),
|
||||
notes=str(data.get('notes') or ''),
|
||||
example_commanders=list(data.get('example_commanders') or []),
|
||||
example_cards=list(data.get('example_cards') or []),
|
||||
synergy_commanders=list(data.get('synergy_commanders') or []),
|
||||
deck_archetype=data.get('deck_archetype'),
|
||||
popularity_hint=data.get('popularity_hint'),
|
||||
popularity_bucket=data.get('popularity_bucket'),
|
||||
description=data.get('description'),
|
||||
editorial_quality=data.get('editorial_quality'),
|
||||
_path=path,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
|
@ -206,12 +244,358 @@ def infer_synergies(anchor: str, curated: List[str], enforced: List[str], analyt
|
|||
return out
|
||||
|
||||
|
||||
def _auto_description(theme: str, synergies: List[str]) -> str:
|
||||
"""Generate a concise description for a theme using heuristics.
|
||||
|
||||
Rules:
|
||||
- Kindred / tribal: "Focuses on getting a high number of <Type> creatures into play with shared payoffs (e.g., X, Y)."
|
||||
- Proliferate: emphasize adding and multiplying counters.
|
||||
- +1/+1 Counters / Counters Matter: growth & scaling payoffs.
|
||||
- Graveyard / Reanimate: recursion loops & value from graveyard.
|
||||
- Tokens / Treasure: generating and exploiting resource tokens.
|
||||
- Default: "Builds around <theme> leveraging synergies with <top 2 synergies>."
|
||||
"""
|
||||
base = theme.strip()
|
||||
lower = base.lower()
|
||||
syn_preview = [s for s in synergies if s and s != theme][:4]
|
||||
def list_fmt(items: List[str], cap: int = 3) -> str:
|
||||
if not items:
|
||||
return ''
|
||||
items = items[:cap]
|
||||
if len(items) == 1:
|
||||
return items[0]
|
||||
return ', '.join(items[:-1]) + f" and {items[-1]}"
|
||||
|
||||
# Identify top synergy preview (skip self)
|
||||
syn_preview = [s for s in synergies if s and s.lower() != lower][:4]
|
||||
syn_fmt2 = list_fmt(syn_preview, 2)
|
||||
|
||||
# --- Mapping refactor (Phase D+ extension) ---
|
||||
# Ordered list of mapping rules. Each rule: (list_of_substring_triggers, description_template_fn)
|
||||
# The first matching rule wins. Substring matches are on `lower`.
|
||||
def synergic(phrase: str) -> str:
|
||||
if syn_fmt2:
|
||||
return phrase + (f" Synergies like {syn_fmt2} reinforce the plan." if not phrase.endswith('.') else f" Synergies like {syn_fmt2} reinforce the plan.")
|
||||
return phrase
|
||||
|
||||
# Attempt to load external mapping file (YAML) for curator overrides.
|
||||
external_mapping: List[Tuple[List[str], Any]] = []
|
||||
mapping_path = ROOT / 'config' / 'themes' / 'description_mapping.yml'
|
||||
if yaml is not None and mapping_path.exists(): # pragma: no cover (I/O heavy)
|
||||
try:
|
||||
raw_map = yaml.safe_load(mapping_path.read_text(encoding='utf-8')) or []
|
||||
if isinstance(raw_map, list):
|
||||
for item in raw_map:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
triggers = item.get('triggers') or []
|
||||
desc_template = item.get('description') or ''
|
||||
if not (isinstance(triggers, list) and isinstance(desc_template, str) and triggers):
|
||||
continue
|
||||
triggers_norm = [str(t).lower() for t in triggers if isinstance(t, str) and t]
|
||||
if not triggers_norm:
|
||||
continue
|
||||
def _factory(template: str):
|
||||
def _fn():
|
||||
if '{SYNERGIES}' in template:
|
||||
rep = f" Synergies like {syn_fmt2} reinforce the plan." if syn_fmt2 else ''
|
||||
return template.replace('{SYNERGIES}', rep)
|
||||
# If template omitted placeholder but we have synergies, append politely.
|
||||
if syn_fmt2:
|
||||
return template.rstrip('.') + f". Synergies like {syn_fmt2} reinforce the plan."
|
||||
return template
|
||||
return _fn
|
||||
external_mapping.append((triggers_norm, _factory(desc_template)))
|
||||
except Exception:
|
||||
external_mapping = []
|
||||
|
||||
MAPPING_RULES: List[Tuple[List[str], Any]] = external_mapping if external_mapping else [
|
||||
(['aristocrats', 'aristocrat'], lambda: synergic('Sacrifices expendable creatures and tokens to trigger death payoffs, recursion, and incremental drain.')),
|
||||
(['sacrifice'], lambda: synergic('Leverages sacrifice outlets and death triggers to grind incremental value and drain opponents.')),
|
||||
(['spellslinger', 'spells matter', 'magecraft', 'prowess'], lambda: 'Chains cheap instants & sorceries for velocity—converting triggers into scalable damage or card advantage before a finisher.'),
|
||||
(['voltron'], lambda: 'Stacks auras, equipment, and protection on a single threat to push commander damage with layered resilience.'),
|
||||
(['group hug'], lambda: 'Accelerates the whole table (cards / mana / tokens) to shape politics, then pivots that shared growth into asymmetric advantage.'),
|
||||
(['pillowfort'], lambda: 'Deploys deterrents and taxation effects to deflect aggression while assembling a protected win route.'),
|
||||
(['stax'], lambda: 'Applies asymmetric resource denial (tax, tap, sacrifice, lock pieces) to throttle opponents while advancing a resilient engine.'),
|
||||
(['aggro','burn'], lambda: 'Applies early pressure and combat tempo to close the game before slower value engines stabilize.'),
|
||||
(['control'], lambda: 'Trades efficiently, accrues card advantage, and wins via inevitability once the board is stabilized.'),
|
||||
(['midrange'], lambda: 'Uses flexible value threats & interaction, pivoting between pressure and attrition based on table texture.'),
|
||||
(['ramp','big mana'], lambda: 'Accelerates mana ahead of curve, then converts surplus into oversized threats or multi-spell bursts.'),
|
||||
(['combo'], lambda: 'Assembles compact piece interactions to generate infinite or overwhelming advantage, protected by tutors & stack interaction.'),
|
||||
(['storm'], lambda: 'Builds storm count with cheap spells & mana bursts, converting it into a lethal payoff turn.'),
|
||||
(['wheel','wheels'], lambda: 'Loops mass draw/discard effects to refill, disrupt sculpted hands, and weaponize symmetrical replacement triggers.'),
|
||||
(['mill'], lambda: 'Attacks libraries as a resource—looping self-mill or opponent mill into recursion and payoff engines.'),
|
||||
(['reanimate','graveyard','dredge'], lambda: 'Loads high-impact cards into the graveyard early and reanimates them for explosive tempo or combo loops.'),
|
||||
(['blink','flicker'], lambda: 'Recycles enter-the-battlefield triggers through blink/flicker loops for compounding value and soft locks.'),
|
||||
(['landfall','lands matter','lands-matter'], lambda: 'Abuses extra land drops and recursion to chain Landfall triggers and scale permanent-based payoffs.'),
|
||||
(['artifact tokens'], lambda: 'Generates artifact tokens as modular resources—fueling sacrifice, draw, and cost-reduction engines.'),
|
||||
(['artifact'], lambda: 'Leverages dense artifact counts for cost reduction, recursion, and modular scaling payoffs.'),
|
||||
(['equipment'], lambda: 'Tutors and reuses equipment to stack stats/keywords onto resilient bodies for persistent pressure.'),
|
||||
(['constellation'], lambda: 'Chains enchantment drops to trigger constellation loops in draw, drain, or scaling effects.'),
|
||||
(['enchant'], lambda: 'Stacks enchantment-based engines (cost reduction, constellation, aura recursion) for relentless value accrual.'),
|
||||
(['shrines'], lambda: 'Accumulates Shrines whose upkeep triggers scale multiplicatively into inevitability.'),
|
||||
(['token'], lambda: 'Goes wide with creature tokens then converts mass into damage, draw, drain, or sacrifice engines.'),
|
||||
(['treasure'], lambda: 'Produces Treasure tokens as flexible ramp & combo fuel enabling explosive payoff turns.'),
|
||||
(['clue','investigate'], lambda: 'Banks Clue tokens for delayed card draw while fueling artifact & token synergies.'),
|
||||
(['food'], lambda: 'Creates Food tokens for life padding and sacrifice loops that translate into drain, draw, or recursion.'),
|
||||
(['blood'], lambda: 'Uses Blood tokens to loot, set up graveyard recursion, and trigger discard/madness payoffs.'),
|
||||
(['map token','map tokens','map '], lambda: 'Generates Map tokens to surveil repeatedly, sculpting draws and fueling artifact/token synergies.'),
|
||||
(['incubate','incubator'], lambda: 'Banks Incubator tokens then transforms them into delayed board presence & artifact synergy triggers.'),
|
||||
(['powerstone'], lambda: 'Creates Powerstones for non-creature ramp powering large artifacts and activation-heavy engines.'),
|
||||
(['role token','role tokens','role '], lambda: 'Applies Role tokens as stackable mini-auras that generate incremental buffs or sacrifice fodder.'),
|
||||
(['energy'], lambda: 'Accumulates Energy counters as a parallel resource spent for tempo spikes, draw, or scalable removal.'),
|
||||
(['poison','infect','toxic'], lambda: 'Leverages Infect/Toxic pressure and proliferate to accelerate poison win thresholds.'),
|
||||
(['proliferate'], lambda: 'Multiplies diverse counters (e.g., +1/+1, loyalty, poison) to escalate board state and inevitability.'),
|
||||
(['+1/+1 counters','counters matter','counters-matter'], lambda: 'Stacks +1/+1 counters broadly then doubles, proliferates, or redistributes them for exponential scaling.'),
|
||||
(['-1/-1 counters'], lambda: 'Spreads -1/-1 counters for removal, attrition, and loop engines leveraging death & sacrifice triggers.'),
|
||||
(['experience'], lambda: 'Builds experience counters to scale commander-centric engines into exponential payoffs.'),
|
||||
(['loyalty','superfriends','planeswalker'], lambda: 'Protects and reuses planeswalkers—amplifying loyalty via proliferate and recursion for inevitability.'),
|
||||
(['shield counter'], lambda: 'Applies shield counters to insulate threats and create lopsided removal trades.'),
|
||||
(['sagas matter','sagas'], lambda: 'Loops and resets Sagas to repeatedly harvest chapter-based value sequences.'),
|
||||
(['lifegain','life gain','life-matters'], lambda: 'Turns repeat lifegain triggers into card draw, scaling bodies, or drain-based win pressure.'),
|
||||
(['lifeloss','life loss'], lambda: 'Channels symmetrical life loss into card flow, recursion, and inevitability drains.'),
|
||||
(['theft','steal'], lambda: 'Acquires opponents’ permanents temporarily or permanently to convert their resources into board control.'),
|
||||
(['devotion'], lambda: 'Concentrates colored pips to unlock Devotion payoffs and scalable static advantages.'),
|
||||
(['domain'], lambda: 'Assembles multiple basic land types rapidly to scale Domain-based effects.'),
|
||||
(['metalcraft'], lambda: 'Maintains ≥3 artifacts to turn on Metalcraft efficiencies and scaling bonuses.'),
|
||||
(['affinity'], lambda: 'Reduces spell costs via board resource counts (Affinity) enabling explosive early multi-spell turns.'),
|
||||
(['improvise'], lambda: 'Taps artifacts as pseudo-mana (Improvise) to deploy oversized non-artifact spells ahead of curve.'),
|
||||
(['convoke'], lambda: 'Converts creature presence into mana (Convoke) accelerating large or off-color spells.'),
|
||||
(['cascade'], lambda: 'Chains cascade triggers to convert single casts into multi-spell value bursts.'),
|
||||
(['mutate'], lambda: 'Stacks mutate layers to reuse mutate triggers and build a resilient evolving threat.'),
|
||||
(['evolve'], lambda: 'Sequentially upgrades creatures with Evolve counters, then leverages accumulated stats or counter synergies.'),
|
||||
(['delirium'], lambda: 'Diversifies graveyard card types to unlock Delirium power thresholds.'),
|
||||
(['threshold'], lambda: 'Fills the graveyard quickly to meet Threshold counts and upgrade spell/creature efficiencies.'),
|
||||
(['vehicles','crew '], lambda: 'Leverages efficient Vehicles and crew bodies to field evasive, sweep-resilient threats.'),
|
||||
(['goad'], lambda: 'Redirects combat outward by goading opponents’ creatures, destabilizing defenses while you build advantage.'),
|
||||
(['monarch'], lambda: 'Claims and defends the Monarch for sustained card draw with evasion & deterrents.'),
|
||||
(['surveil'], lambda: 'Continuously filters with Surveil to sculpt draws, fuel recursion, and enable graveyard synergies.'),
|
||||
(['explore'], lambda: 'Uses Explore triggers to smooth draws, grow creatures, and feed graveyard-adjacent engines.'),
|
||||
(['exploit'], lambda: 'Sacrifices creatures on ETB (Exploit) converting fodder into removal, draw, or recursion leverage.'),
|
||||
(['venture'], lambda: 'Repeats Venture into the Dungeon steps to layer incremental room rewards into compounding advantage.'),
|
||||
(['dungeon'], lambda: 'Progresses through dungeons repeatedly to chain room value and synergize with venture payoffs.'),
|
||||
(['initiative'], lambda: 'Claims the Initiative, advancing the Undercity while defending control of the progression track.'),
|
||||
(['backgrounds matter','background'], lambda: 'Pairs a Commander with Backgrounds for modular static buffs & class-style customization.'),
|
||||
(['connive'], lambda: 'Uses Connive looting + counters to sculpt hands, grow threats, and feed recursion lines.'),
|
||||
(['discover'], lambda: 'Leverages Discover to cheat spell mana values, chaining free cascade-like board development.'),
|
||||
(['craft'], lambda: 'Transforms / upgrades permanents via Craft, banking latent value until a timing pivot.'),
|
||||
(['learn'], lambda: 'Uses Learn to toolbox from side selections (or discard/draw) enhancing adaptability & consistency.'),
|
||||
(['escape'], lambda: 'Escapes threats from the graveyard by exiling spent resources, generating recursive inevitability.'),
|
||||
(['flashback'], lambda: 'Replays instants & sorceries from the graveyard (Flashback) for incremental spell velocity.'),
|
||||
(['aftermath'], lambda: 'Extracts two-phase value from split Aftermath spells, maximizing flexible sequencing.'),
|
||||
(['adventure'], lambda: 'Casts Adventure spell sides first to stack value before committing creature bodies to board.'),
|
||||
(['foretell'], lambda: 'Foretells spells early to smooth curve, conceal information, and discount impactful future turns.'),
|
||||
(['miracle'], lambda: 'Manipulates topdecks / draw timing to exploit Miracle cost reductions on splashy spells.'),
|
||||
(['kicker','multikicker'], lambda: 'Kicker / Multikicker spells scale flexibly—paying extra mana for amplified late-game impact.'),
|
||||
(['buyback'], lambda: 'Loops Buyback spells to convert excess mana into repeatable effects & inevitability.'),
|
||||
(['suspend'], lambda: 'Suspends spells early to pay off delayed powerful effects at discounted timing.'),
|
||||
(['retrace'], lambda: 'Turns dead land draws into fuel by recasting Retrace spells for attrition resilience.'),
|
||||
(['rebound'], lambda: 'Uses Rebound to double-cast value spells, banking a delayed second resolution.'),
|
||||
(['escalate'], lambda: 'Selects multiple modes on Escalate spells, trading mana/cards for flexible stacked effects.'),
|
||||
(['overload'], lambda: 'Overloads modal spells into one-sided board impacts or mass disruption swings.'),
|
||||
(['prowl'], lambda: 'Enables Prowl cost reductions via tribe-based combat connections, accelerating tempo sequencing.'),
|
||||
(['delve'], lambda: 'Exiles graveyard cards to pay for Delve spells, converting stocked yard into mana efficiency.'),
|
||||
(['madness'], lambda: 'Turns discard into mana-efficient Madness casts, leveraging looting & Blood token filtering.'),
|
||||
(['escape'], lambda: 'Recurs Escape cards by exiling spent graveyard fodder for inevitability. (dedupe)')
|
||||
]
|
||||
|
||||
for keys, fn in MAPPING_RULES:
|
||||
for k in keys:
|
||||
if k in lower:
|
||||
try:
|
||||
return fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Additional generic counters subtype fallback (not already matched)
|
||||
if lower.endswith(' counters') and all(x not in lower for x in ['+1/+1', '-1/-1', 'poison']):
|
||||
root = base.replace('Counters','').strip()
|
||||
return f"Accumulates {root.lower()} counters to unlock scaling payoffs, removal triggers, or delayed value conversions.".replace(' ',' ')
|
||||
|
||||
# (Legacy chain retained for any themes not yet incorporated in mapping; will be pruned later.)
|
||||
if lower == 'aristocrats' or 'aristocrat' in lower or 'sacrifice' in lower:
|
||||
core = 'Sacrifices expendable creatures and tokens to trigger death payoffs, recursive engines, and incremental drain.'
|
||||
if syn_fmt2:
|
||||
return core + f" Synergies like {syn_fmt2} reinforce inevitability."
|
||||
return core
|
||||
if 'spellslinger' in lower or 'spells matter' in lower or (lower == 'spells') or 'prowess' in lower or 'magecraft' in lower:
|
||||
return ("Chains cheap instants & sorceries for velocity—turning card draw, mana bursts, and prowess/Magecraft triggers into"
|
||||
" scalable damage or resource advantage before a decisive finisher.")
|
||||
if 'voltron' in lower:
|
||||
return ("Stacks auras, equipment, and protective buffs onto a single threat—pushing commander damage with evasion, recursion,"
|
||||
" and layered protection.")
|
||||
if lower == 'group hug' or 'group hug' in lower:
|
||||
return ("Accelerates the whole table with cards, mana, or tokens to shape politics—then pivots shared growth into subtle win paths"
|
||||
" or leverage effects that scale better for you.")
|
||||
if 'pillowfort' in lower:
|
||||
return ("Erects deterrents and taxation effects to discourage attacks while assembling incremental advantage and a protected win condition.")
|
||||
if 'stax' in lower:
|
||||
return ("Applies asymmetric resource denial (tax, tap, sacrifice, lock pieces) to constrict opponents while advancing a resilient engine.")
|
||||
if lower in {'aggro', 'burn'} or 'aggro' in lower:
|
||||
return ("Applies fast early pressure and combat-focused tempo to reduce life totals before slower decks stabilize.")
|
||||
if lower == 'control' or 'control' in lower:
|
||||
return ("Trades efficiently with threats, accumulates card advantage, and stabilizes into inevitability via superior late-game engines.")
|
||||
if 'midrange' in lower:
|
||||
return ("Deploys flexible, value-centric threats and interaction—pivoting between aggression and attrition based on table texture.")
|
||||
if 'ramp' in lower or 'big mana' in lower:
|
||||
return ("Accelerates mana production ahead of curve, then converts the surplus into oversized threats or multi-spell turns.")
|
||||
if 'combo' in lower:
|
||||
return ("Assembles a small set of interlocking pieces that produce infinite or overwhelming advantage, protecting the line with tutors & stack interaction.")
|
||||
if 'storm' in lower:
|
||||
return ("Builds a critical mass of cheap spells and mana bursts to inflate storm count, converting it into a lethal finisher or overwhelming value turn.")
|
||||
if 'wheels' in lower or 'wheel' in lower:
|
||||
return ("Loops mass draw/discard effects (wheel spells) to refill, disrupt sculpted hands, and amplify payoffs like locust or damage triggers.")
|
||||
if 'mill' in lower:
|
||||
return ("Targets libraries as the primary resource—using repeatable self or opponent milling plus recursion / payoff loops.")
|
||||
if 'reanimate' in lower or (('reanimat' in lower or 'graveyard' in lower) and 'aristocrat' not in lower):
|
||||
return ("Dumps high-impact creatures into the graveyard early, then reanimates them efficiently for explosive board presence or combo loops.")
|
||||
if 'blink' in lower or 'flicker' in lower:
|
||||
return ("Repeatedly exiles and returns creatures to reuse powerful enter-the-battlefield triggers and incremental value engines.")
|
||||
if 'landfall' in lower or 'lands matter' in lower or 'lands-matter' in lower:
|
||||
return ("Accelerates extra land drops and recursion to trigger Landfall chains and scalable land-based payoffs.")
|
||||
if 'artifact' in lower and 'tokens' not in lower:
|
||||
return ("Leverages artifact density for cost reduction, recursion, and modular value engines—scaling with synergies that reward artifact count.")
|
||||
if 'equipment' in lower:
|
||||
return ("Equips repeatable stat and keyword boosts onto resilient bodies, tutoring and reusing gear to maintain pressure through removal.")
|
||||
if 'aura' in lower or 'enchant' in lower and 'enchantments matter' in lower:
|
||||
return ("Stacks enchantment or aura-based value engines (draw, cost reduction, constellation) into compounding board & card advantage.")
|
||||
if 'constellation' in lower:
|
||||
return ("Triggers constellation by repeatedly landing enchantments, converting steady plays into card draw, drain, or board scaling.")
|
||||
if 'shrine' in lower or 'shrines' in lower:
|
||||
return ("Accumulates Shrines whose upkeep triggers scale multiplicatively, protecting the board while compounding advantage.")
|
||||
if 'token' in lower and 'treasure' not in lower:
|
||||
return ("Goes wide generating expendable creature tokens, then converts board mass into damage, draw, or aristocrat-style drains.")
|
||||
if 'treasure' in lower:
|
||||
return ("Manufactures Treasure tokens as flexible ramp and combo fuel—translating temporary mana into explosive payoff turns.")
|
||||
if 'clue' in lower:
|
||||
return ("Generates Clue tokens as delayed draw—fueling card advantage engines and artifact/token synergies.")
|
||||
if 'food' in lower:
|
||||
return ("Creates Food tokens for life buffering and sacrifice value, converting them into draw, drain, or resource loops.")
|
||||
if 'blood' in lower:
|
||||
return ("Uses Blood tokens to filter draws, enable graveyard setups, and trigger discard/madness or artifact payoffs.")
|
||||
if 'map token' in lower or 'map' in lower and 'token' in lower:
|
||||
return ("Generates Map tokens to repeatedly surveil and sculpt draws while enabling artifact & token synergies.")
|
||||
if 'incubate' in lower or 'incubator' in lower:
|
||||
return ("Creates Incubator tokens then transforms them into creatures—banking future board presence and artifact synergies.")
|
||||
if 'powerstone' in lower:
|
||||
return ("Produces Powerstone tokens for non-creature ramp, channeling the mana into large artifacts or activated engines.")
|
||||
if 'role token' in lower or 'role' in lower and 'token' in lower:
|
||||
return ("Applies Role tokens as layered auras providing incremental buffs, sacrifice fodder, or value triggers.")
|
||||
if 'energy' in lower and 'counter' not in lower:
|
||||
return ("Accumulates Energy counters as a parallel resource—spending them for burst tempo, card flow, or scalable removal.")
|
||||
if 'poison' in lower or 'infect' in lower or 'toxic' in lower:
|
||||
return ("Applies poison counters through Infect/Toxic pressure and proliferate tools to accelerate an alternate win condition.")
|
||||
if 'proliferate' in lower:
|
||||
return ("Adds and multiplies counters (e.g., +1/+1, loyalty, poison) by repeatedly proliferating incremental board advantages.")
|
||||
if '+1/+1 counters' in lower or 'counters matter' in lower or 'counters-matter' in lower:
|
||||
return ("Stacks +1/+1 counters across the board, then amplifies them via doubling, proliferate, or modular scaling payoffs.")
|
||||
if 'dredge' in lower:
|
||||
return ("Replaces draws with self-mill to load the graveyard, then recurs or reanimates high-value pieces for compounding advantage.")
|
||||
if 'delirium' in lower:
|
||||
return ("Diversifies card types in the graveyard to unlock Delirium thresholds, turning on boosted stats or efficient effects.")
|
||||
if 'threshold' in lower:
|
||||
return ("Fills the graveyard rapidly to meet Threshold counts, upgrading spell efficiencies and creature stats.")
|
||||
if 'affinity' in lower:
|
||||
return ("Reduces spell costs via artifact / basic synergy counts, enabling explosive multi-spell turns and early board presence.")
|
||||
if 'improvise' in lower:
|
||||
return ("Taps artifacts as mana sources (Improvise) to cast oversized non-artifact spells ahead of curve.")
|
||||
if 'convoke' in lower:
|
||||
return ("Turns creatures into a mana engine (Convoke), deploying large spells while developing board presence.")
|
||||
if 'cascade' in lower:
|
||||
return ("Chains cascade triggers to convert high-cost spells into multiple free spells, snowballing value and board impact.")
|
||||
if 'mutate' in lower:
|
||||
return ("Stacks mutate piles to reuse mutate triggers while building a resilient, scaling singular threat.")
|
||||
if 'evolve' in lower:
|
||||
return ("Sequentially grows creatures with Evolve triggers, then leverages the accumulated stats or counter synergies.")
|
||||
if 'devotion' in lower:
|
||||
return ("Concentrates colored pips on permanents to unlock Devotion payoffs (static buffs, card draw, or burst mana).")
|
||||
if 'domain' in lower:
|
||||
return ("Assembles multiple basic land types quickly to scale Domain-based spells and effects.")
|
||||
if 'metalcraft' in lower:
|
||||
return ("Maintains a high artifact count (3+) to turn on efficient Metalcraft bonuses and scaling payoffs.")
|
||||
if 'vehicles' in lower or 'crew' in lower:
|
||||
return ("Uses under-costed Vehicles and efficient crew bodies—turning transient artifacts into evasive, hard-to-wipe threats.")
|
||||
if 'goad' in lower:
|
||||
return ("Forces opponents' creatures to attack each other (Goad), destabilizing defenses while you set up value engines.")
|
||||
if 'monarch' in lower:
|
||||
return ("Claims and defends the Monarch for steady card draw while using evasion, deterrents, or removal to keep the crown.")
|
||||
if 'investigate' in lower:
|
||||
return ("Generates Clue tokens to bank future card draw while triggering artifact and token-matter synergies.")
|
||||
if 'surveil' in lower:
|
||||
return ("Filters and stocks the graveyard with Surveil, enabling recursion, delve, and threshold-like payoffs.")
|
||||
if 'explore' in lower:
|
||||
return ("Uses Explore triggers to smooth draws, grow creatures with counters, and fuel graveyard-adjacent synergies.")
|
||||
if 'historic' in lower and 'historics' in lower:
|
||||
return ("Casts a dense mix of artifacts, legendaries, and sagas to trigger Historic-matter payoffs repeatedly.")
|
||||
if 'exploit' in lower:
|
||||
return ("Sacrifices creatures on ETB (Exploit) to convert fodder into removal, card draw, or recursion leverage.")
|
||||
if '-1/-1' in lower:
|
||||
return ("Distributes -1/-1 counters for removal, attrition, and combo loops—recycling or exploiting death triggers.")
|
||||
if 'experience' in lower:
|
||||
return ("Builds experience counters to scale repeatable commander-specific payoffs into exponential board or value growth.")
|
||||
if 'loyalty' in lower or 'superfriends' in lower or 'planeswalker' in lower:
|
||||
return ("Protects and reuses planeswalkers—stacking loyalty acceleration, proliferate, and recurring interaction for inevitability.")
|
||||
if 'shield counter' in lower or 'shield-counters' in lower:
|
||||
return ("Applies shield counters to insulate key threats, turning removal trades lopsided while advancing a protected board state.")
|
||||
if 'sagas matter' in lower or 'sagas' in lower:
|
||||
return ("Cycles through Saga chapters for repeatable value—abusing recursion, copying, or reset effects to replay powerful chapter triggers.")
|
||||
if 'exp counters' in lower:
|
||||
return ("Accumulates experience counters as a permanent scaling vector, compounding the efficiency of commander-centric engines.")
|
||||
if 'lifegain' in lower or 'life gain' in lower or 'life-matters' in lower:
|
||||
return ("Turns repeated lifegain triggers into card draw, scaling creatures, or alternate win drains while stabilizing vs. aggression.")
|
||||
if 'lifeloss' in lower and 'life loss' in lower:
|
||||
return ("Leverages incremental life loss across the table to fuel symmetric draw, recursion, and inevitability drains.")
|
||||
if 'wheels' in lower:
|
||||
return ("Continuously refills hands with mass draw/discard (wheel) effects, weaponizing symmetrical replacement via damage or token payoffs.")
|
||||
if 'theft' in lower or 'steal' in lower:
|
||||
return ("Temporarily or permanently acquires opponents' permanents, converting stolen assets into board control and resource denial.")
|
||||
if 'blink' in lower:
|
||||
return ("Loops enter-the-battlefield triggers via flicker/blink effects for compounding value and soft-lock synergies.")
|
||||
|
||||
# Remaining generic branch and tribal fallback
|
||||
if 'kindred' in lower or (base.endswith(' Tribe') or base.endswith(' Tribal')):
|
||||
# Extract creature type (first word before Kindred, or first token)
|
||||
parts = base.split()
|
||||
ctype = parts[0] if parts else 'creature'
|
||||
ex = list_fmt(syn_preview, 2)
|
||||
tail = f" (e.g., {ex})" if ex else ''
|
||||
return f"Focuses on getting a high number of {ctype} creatures into play with shared payoffs{tail}."
|
||||
if 'extra turn' in lower:
|
||||
return "Accumulates extra turn effects to snowball card advantage, combat steps, and inevitability."
|
||||
ex2 = list_fmt(syn_preview, 2)
|
||||
if ex2:
|
||||
return f"Builds around {base} leveraging synergies with {ex2}."
|
||||
return f"Builds around the {base} theme and its supporting synergies."
|
||||
|
||||
|
||||
def _derive_popularity_bucket(count: int, boundaries: List[int]) -> str:
|
||||
# boundaries expected ascending length 4 dividing into 5 buckets
|
||||
# Example: [50, 120, 250, 600]
|
||||
if count <= boundaries[0]:
|
||||
return 'Rare'
|
||||
if count <= boundaries[1]:
|
||||
return 'Niche'
|
||||
if count <= boundaries[2]:
|
||||
return 'Uncommon'
|
||||
if count <= boundaries[3]:
|
||||
return 'Common'
|
||||
return 'Very Common'
|
||||
|
||||
|
||||
def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
||||
# Deterministic seed for inference ordering & any randomized fallback ordering
|
||||
seed_env = os.environ.get('EDITORIAL_SEED')
|
||||
if seed_env:
|
||||
try:
|
||||
random.seed(int(seed_env))
|
||||
except Exception:
|
||||
random.seed(seed_env)
|
||||
analytics = regenerate_analytics(verbose)
|
||||
whitelist = analytics['whitelist']
|
||||
synergy_cap = int(whitelist.get('synergy_cap', 0) or 0)
|
||||
normalization_map: Dict[str, str] = whitelist.get('normalization', {}) if isinstance(whitelist.get('normalization'), dict) else {}
|
||||
enforced_cfg: Dict[str, List[str]] = whitelist.get('enforced_synergies', {}) or {}
|
||||
aggressive_fill = bool(int(os.environ.get('EDITORIAL_AGGRESSIVE_FILL', '0') or '0'))
|
||||
|
||||
yaml_catalog = load_catalog_yaml(verbose)
|
||||
all_themes: Set[str] = set(analytics['theme_tags']) | {t.display_name for t in yaml_catalog.values()}
|
||||
|
|
@ -219,14 +603,58 @@ def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
|||
all_themes = apply_normalization(all_themes, normalization_map)
|
||||
curated_baseline = derive_synergies_for_tags(all_themes)
|
||||
|
||||
# --- Synergy pairs fallback (external curated pairs) ---
|
||||
synergy_pairs_path = ROOT / 'config' / 'themes' / 'synergy_pairs.yml'
|
||||
synergy_pairs: Dict[str, List[str]] = {}
|
||||
if yaml is not None and synergy_pairs_path.exists(): # pragma: no cover (I/O)
|
||||
try:
|
||||
raw_pairs = yaml.safe_load(synergy_pairs_path.read_text(encoding='utf-8')) or {}
|
||||
sp = raw_pairs.get('synergy_pairs', {}) if isinstance(raw_pairs, dict) else {}
|
||||
if isinstance(sp, dict):
|
||||
for k, v in sp.items():
|
||||
if isinstance(k, str) and isinstance(v, list):
|
||||
cleaned = [str(x) for x in v if isinstance(x, str) and x]
|
||||
if cleaned:
|
||||
synergy_pairs[k] = cleaned[:8] # safety cap
|
||||
except Exception as _e: # pragma: no cover
|
||||
if verbose:
|
||||
print(f"[build_theme_catalog] Failed loading synergy_pairs.yml: {_e}", file=sys.stderr)
|
||||
# Apply normalization to synergy pair keys if needed
|
||||
if normalization_map and synergy_pairs:
|
||||
normalized_pairs: Dict[str, List[str]] = {}
|
||||
for k, lst in synergy_pairs.items():
|
||||
nk = normalization_map.get(k, k)
|
||||
normed_list = []
|
||||
seen = set()
|
||||
for s in lst:
|
||||
s2 = normalization_map.get(s, s)
|
||||
if s2 not in seen:
|
||||
normed_list.append(s2)
|
||||
seen.add(s2)
|
||||
if nk not in normalized_pairs:
|
||||
normalized_pairs[nk] = normed_list
|
||||
synergy_pairs = normalized_pairs
|
||||
|
||||
entries: List[Dict[str, Any]] = []
|
||||
processed = 0
|
||||
for theme in sorted(all_themes):
|
||||
sorted_themes = sorted(all_themes)
|
||||
if seed_env: # Optional shuffle for testing ordering stability (then re-sort deterministically by name removed)
|
||||
# Keep original alphabetical for stable UX; deterministic seed only affects downstream random choices.
|
||||
pass
|
||||
for theme in sorted_themes:
|
||||
if limit and processed >= limit:
|
||||
break
|
||||
processed += 1
|
||||
y = yaml_catalog.get(theme)
|
||||
curated_list = list(y.curated_synergies) if y and y.curated_synergies else curated_baseline.get(theme, [])
|
||||
curated_list = []
|
||||
if y and y.curated_synergies:
|
||||
curated_list = list(y.curated_synergies)
|
||||
else:
|
||||
# Baseline heuristics
|
||||
curated_list = curated_baseline.get(theme, [])
|
||||
# If still empty, attempt synergy_pairs fallback
|
||||
if (not curated_list) and theme in synergy_pairs:
|
||||
curated_list = list(synergy_pairs.get(theme, []))
|
||||
enforced_list: List[str] = []
|
||||
if y and y.enforced_synergies:
|
||||
for s in y.enforced_synergies:
|
||||
|
|
@ -240,6 +668,20 @@ def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
|||
if not inferred_list and y and y.inferred_synergies:
|
||||
inferred_list = [s for s in y.inferred_synergies if s not in curated_list and s not in enforced_list]
|
||||
|
||||
# Aggressive fill mode: if after merge we would have <3 synergies (excluding curated/enforced), attempt to borrow
|
||||
# from global top co-occurrences even if below normal thresholds. This is opt-in for ultra sparse themes.
|
||||
if aggressive_fill and len(curated_list) + len(enforced_list) < 2 and len(inferred_list) < 2:
|
||||
anchor = theme
|
||||
co_map = analytics['co_map']
|
||||
if anchor in co_map:
|
||||
candidates = cooccurrence_scores_for(anchor, analytics['co_map'], analytics['tag_counts'], analytics['total_rows'])
|
||||
for other, score, co_count in candidates:
|
||||
if other in curated_list or other in enforced_list or other == anchor or other in inferred_list:
|
||||
continue
|
||||
inferred_list.append(other)
|
||||
if len(inferred_list) >= 4:
|
||||
break
|
||||
|
||||
if normalization_map:
|
||||
def _norm(seq: List[str]) -> List[str]:
|
||||
seen = set()
|
||||
|
|
@ -315,9 +757,44 @@ def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
|||
# Pass through synergy_commanders if already curated (script will populate going forward)
|
||||
if hasattr(y, 'synergy_commanders') and getattr(y, 'synergy_commanders'):
|
||||
entry['synergy_commanders'] = [c for c in getattr(y, 'synergy_commanders') if isinstance(c, str)][:12]
|
||||
if hasattr(y, 'popularity_bucket') and getattr(y, 'popularity_bucket'):
|
||||
entry['popularity_bucket'] = getattr(y, 'popularity_bucket')
|
||||
if hasattr(y, 'editorial_quality') and getattr(y, 'editorial_quality'):
|
||||
entry['editorial_quality'] = getattr(y, 'editorial_quality')
|
||||
# Derive popularity bucket if absent using total frequency across colors
|
||||
if 'popularity_bucket' not in entry:
|
||||
total_freq = 0
|
||||
for c in analytics['frequencies'].keys():
|
||||
try:
|
||||
total_freq += int(analytics['frequencies'].get(c, {}).get(theme, 0))
|
||||
except Exception:
|
||||
pass
|
||||
# Heuristic boundaries (tunable via env override)
|
||||
b_env = os.environ.get('EDITORIAL_POP_BOUNDARIES') # e.g. "50,120,250,600"
|
||||
if b_env:
|
||||
try:
|
||||
parts = [int(x.strip()) for x in b_env.split(',') if x.strip()]
|
||||
if len(parts) == 4:
|
||||
boundaries = parts
|
||||
else:
|
||||
boundaries = [40, 100, 220, 500]
|
||||
except Exception:
|
||||
boundaries = [40, 100, 220, 500]
|
||||
else:
|
||||
boundaries = [40, 100, 220, 500]
|
||||
entry['popularity_bucket'] = _derive_popularity_bucket(total_freq, boundaries)
|
||||
# Description: respect curated YAML description if provided; else auto-generate.
|
||||
if y and hasattr(y, 'description') and getattr(y, 'description'):
|
||||
entry['description'] = getattr(y, 'description')
|
||||
else:
|
||||
try:
|
||||
entry['description'] = _auto_description(theme, entry.get('synergies', []))
|
||||
except Exception:
|
||||
pass
|
||||
entries.append(entry)
|
||||
|
||||
provenance = {
|
||||
# Renamed from 'provenance' to 'metadata_info' (migration phase)
|
||||
metadata_info = {
|
||||
'mode': 'merge',
|
||||
'generated_at': time.strftime('%Y-%m-%dT%H:%M:%S'),
|
||||
'curated_yaml_files': len(yaml_catalog),
|
||||
|
|
@ -325,20 +802,96 @@ def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
|||
'inference': 'pmi',
|
||||
'version': 'phase-b-merge-v1'
|
||||
}
|
||||
# Optional popularity analytics export for Phase D metrics collection
|
||||
if os.environ.get('EDITORIAL_POP_EXPORT'):
|
||||
try:
|
||||
bucket_counts: Dict[str, int] = {}
|
||||
for t in entries:
|
||||
b = t.get('popularity_bucket', 'Unknown')
|
||||
bucket_counts[b] = bucket_counts.get(b, 0) + 1
|
||||
export = {
|
||||
'generated_at': metadata_info['generated_at'],
|
||||
'bucket_counts': bucket_counts,
|
||||
'total_themes': len(entries),
|
||||
}
|
||||
metrics_path = OUTPUT_JSON.parent / 'theme_popularity_metrics.json'
|
||||
with open(metrics_path, 'w', encoding='utf-8') as mf:
|
||||
json.dump(export, mf, indent=2)
|
||||
except Exception as _e: # pragma: no cover
|
||||
if verbose:
|
||||
print(f"[build_theme_catalog] Failed popularity metrics export: {_e}", file=sys.stderr)
|
||||
return {
|
||||
'themes': entries,
|
||||
'frequencies_by_base_color': analytics['frequencies'],
|
||||
'generated_from': 'merge (analytics + curated YAML + whitelist)',
|
||||
'provenance': provenance,
|
||||
'generated_from': 'merge (analytics + curated YAML + whitelist)',
|
||||
'metadata_info': metadata_info,
|
||||
'yaml_catalog': yaml_catalog, # include for optional backfill step
|
||||
# Lightweight analytics for downstream tests/reports (not written unless explicitly requested)
|
||||
'description_fallback_summary': _compute_fallback_summary(entries, analytics['frequencies']) if os.environ.get('EDITORIAL_INCLUDE_FALLBACK_SUMMARY') else None,
|
||||
}
|
||||
|
||||
|
||||
def _compute_fallback_summary(entries: List[Dict[str, Any]], freqs: Dict[str, Dict[str, int]]) -> Dict[str, Any]:
|
||||
"""Compute statistics about generic fallback descriptions.
|
||||
|
||||
A description is considered a generic fallback if it begins with one of the
|
||||
standard generic stems produced by _auto_description:
|
||||
- "Builds around "
|
||||
Tribal phrasing ("Focuses on getting a high number of ...") is NOT treated
|
||||
as generic; it conveys archetype specificity.
|
||||
"""
|
||||
def total_freq(theme: str) -> int:
|
||||
s = 0
|
||||
for c in freqs.keys():
|
||||
try:
|
||||
s += int(freqs.get(c, {}).get(theme, 0))
|
||||
except Exception:
|
||||
pass
|
||||
return s
|
||||
|
||||
generic: List[Dict[str, Any]] = []
|
||||
generic_with_synergies = 0
|
||||
generic_plain = 0
|
||||
for e in entries:
|
||||
desc = (e.get('description') or '').strip()
|
||||
if not desc.startswith('Builds around'):
|
||||
continue
|
||||
# Distinguish forms
|
||||
if desc.startswith('Builds around the '):
|
||||
generic_plain += 1
|
||||
else:
|
||||
generic_with_synergies += 1
|
||||
theme = e.get('theme')
|
||||
generic.append({
|
||||
'theme': theme,
|
||||
'popularity_bucket': e.get('popularity_bucket'),
|
||||
'synergy_count': len(e.get('synergies') or []),
|
||||
'total_frequency': total_freq(theme),
|
||||
'description': desc,
|
||||
})
|
||||
|
||||
generic.sort(key=lambda x: (-x['total_frequency'], x['theme']))
|
||||
return {
|
||||
'total_themes': len(entries),
|
||||
'generic_total': len(generic),
|
||||
'generic_with_synergies': generic_with_synergies,
|
||||
'generic_plain': generic_plain,
|
||||
'generic_pct': round(100.0 * len(generic) / max(1, len(entries)), 2),
|
||||
'top_generic_by_frequency': generic[:50], # cap for brevity
|
||||
}
|
||||
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
parser = argparse.ArgumentParser(description='Build merged theme catalog (Phase B)')
|
||||
parser.add_argument('--limit', type=int, default=0)
|
||||
parser.add_argument('--verbose', action='store_true')
|
||||
parser.add_argument('--dry-run', action='store_true')
|
||||
parser.add_argument('--schema', action='store_true', help='Print JSON Schema for catalog and exit')
|
||||
parser.add_argument('--allow-limit-write', action='store_true', help='Allow writing theme_list.json when --limit > 0 (safety guard)')
|
||||
parser.add_argument('--backfill-yaml', action='store_true', help='Write auto-generated description & popularity_bucket back into YAML files (fills missing only)')
|
||||
parser.add_argument('--force-backfill-yaml', action='store_true', help='Force overwrite existing description/popularity_bucket in YAML when backfilling')
|
||||
parser.add_argument('--output', type=str, default=str(OUTPUT_JSON), help='Output path for theme_list.json (tests can override)')
|
||||
args = parser.parse_args()
|
||||
if args.schema:
|
||||
# Lazy import to avoid circular dependency: replicate minimal schema inline from models file if present
|
||||
|
|
@ -352,11 +905,92 @@ def main(): # pragma: no cover
|
|||
return
|
||||
data = build_catalog(limit=args.limit, verbose=args.verbose)
|
||||
if args.dry_run:
|
||||
print(json.dumps({'theme_count': len(data['themes']), 'provenance': data['provenance']}, indent=2))
|
||||
print(json.dumps({'theme_count': len(data['themes']), 'metadata_info': data['metadata_info']}, indent=2))
|
||||
else:
|
||||
os.makedirs(OUTPUT_JSON.parent, exist_ok=True)
|
||||
with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
out_path = Path(args.output).resolve()
|
||||
target_is_default = out_path == OUTPUT_JSON
|
||||
if target_is_default and args.limit and not args.allow_limit_write:
|
||||
print(f"Refusing to overwrite {OUTPUT_JSON.name} with truncated list (limit={args.limit}). Use --allow-limit-write to force or omit --limit.", file=sys.stderr)
|
||||
return
|
||||
os.makedirs(out_path.parent, exist_ok=True)
|
||||
with open(out_path, 'w', encoding='utf-8') as f:
|
||||
json.dump({k: v for k, v in data.items() if k != 'yaml_catalog'}, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# KPI fallback summary history (append JSONL) if computed
|
||||
if data.get('description_fallback_summary'):
|
||||
try:
|
||||
history_path = OUTPUT_JSON.parent / 'description_fallback_history.jsonl'
|
||||
record = {
|
||||
'timestamp': time.strftime('%Y-%m-%dT%H:%M:%S'),
|
||||
**(data['description_fallback_summary'] or {})
|
||||
}
|
||||
with open(history_path, 'a', encoding='utf-8') as hf:
|
||||
hf.write(json.dumps(record) + '\n')
|
||||
except Exception as _e: # pragma: no cover
|
||||
print(f"[build_theme_catalog] Failed writing KPI history: {_e}", file=sys.stderr)
|
||||
|
||||
# Optional YAML backfill step (CLI flag or env EDITORIAL_BACKFILL_YAML=1)
|
||||
do_backfill_env = bool(int(os.environ.get('EDITORIAL_BACKFILL_YAML', '0') or '0'))
|
||||
if (args.backfill_yaml or do_backfill_env) and target_is_default:
|
||||
# Safeguard: if catalog dir missing, attempt to auto-export Phase A YAML first
|
||||
if not CATALOG_DIR.exists(): # pragma: no cover (environmental)
|
||||
try:
|
||||
from scripts.export_themes_to_yaml import main as export_main # type: ignore
|
||||
export_main(['--force']) # type: ignore[arg-type]
|
||||
except Exception as _e:
|
||||
print(f"[build_theme_catalog] WARNING: catalog dir missing and auto export failed: {_e}", file=sys.stderr)
|
||||
if yaml is None:
|
||||
print('[build_theme_catalog] PyYAML not available; skipping YAML backfill', file=sys.stderr)
|
||||
else:
|
||||
force = args.force_backfill_yaml
|
||||
updated = 0
|
||||
for entry in data['themes']:
|
||||
theme_name = entry.get('theme')
|
||||
ty = data['yaml_catalog'].get(theme_name) if isinstance(data.get('yaml_catalog'), dict) else None
|
||||
if not ty or not getattr(ty, '_path', None):
|
||||
continue
|
||||
try:
|
||||
raw = yaml.safe_load(ty._path.read_text(encoding='utf-8')) or {}
|
||||
except Exception:
|
||||
continue
|
||||
changed = False
|
||||
# Metadata info stamping (formerly 'provenance')
|
||||
meta_block = raw.get('metadata_info') if isinstance(raw.get('metadata_info'), dict) else {}
|
||||
# Legacy migration: if no metadata_info but legacy provenance present, adopt it
|
||||
if not meta_block and isinstance(raw.get('provenance'), dict):
|
||||
meta_block = raw.get('provenance') # type: ignore
|
||||
changed = True
|
||||
if force or not meta_block.get('last_backfill'):
|
||||
meta_block['last_backfill'] = time.strftime('%Y-%m-%dT%H:%M:%S')
|
||||
meta_block['script'] = 'build_theme_catalog.py'
|
||||
meta_block['version'] = 'phase-b-merge-v1'
|
||||
raw['metadata_info'] = meta_block
|
||||
if 'provenance' in raw:
|
||||
del raw['provenance']
|
||||
changed = True
|
||||
# Backfill description
|
||||
if force or not raw.get('description'):
|
||||
if entry.get('description'):
|
||||
raw['description'] = entry['description']
|
||||
changed = True
|
||||
# Backfill popularity_bucket (always reflect derived unless pinned and not forcing?)
|
||||
if force or not raw.get('popularity_bucket'):
|
||||
if entry.get('popularity_bucket'):
|
||||
raw['popularity_bucket'] = entry['popularity_bucket']
|
||||
changed = True
|
||||
# Backfill editorial_quality if forcing and present in catalog entry but absent in YAML
|
||||
if force and entry.get('editorial_quality') and not raw.get('editorial_quality'):
|
||||
raw['editorial_quality'] = entry['editorial_quality']
|
||||
changed = True
|
||||
if changed:
|
||||
try:
|
||||
with open(ty._path, 'w', encoding='utf-8') as yf:
|
||||
yaml.safe_dump(raw, yf, sort_keys=False, allow_unicode=True)
|
||||
updated += 1
|
||||
except Exception as _e: # pragma: no cover
|
||||
print(f"[build_theme_catalog] Failed writing back {ty._path.name}: {_e}", file=sys.stderr)
|
||||
if updated and args.verbose:
|
||||
print(f"[build_theme_catalog] Backfilled metadata into {updated} YAML files", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue