mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat(editorial): Phase D synergy commander enrichment, augmentation, lint & docs\n\nAdds Phase D editorial tooling: synergy-based commander selection with 3/2/1 pattern, duplicate filtering, annotated synergy_commanders, promotion to minimum examples, and augmentation heuristics (e.g. Counters Matter/Proliferate injection). Includes new scripts (generate_theme_editorial_suggestions, lint, validate, catalog build/apply), updates orchestrator & web routes, expands CI workflow, and documents usage & non-determinism policies. Updates lint rules, type definitions, and docker configs.
This commit is contained in:
parent
16261bbf09
commit
f2a76d2ffc
35 changed files with 2818 additions and 509 deletions
79
code/scripts/apply_next_theme_editorial.py
Normal file
79
code/scripts/apply_next_theme_editorial.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Apply example_cards / example_commanders to the next theme missing them.
|
||||
|
||||
Usage:
|
||||
python code/scripts/apply_next_theme_editorial.py
|
||||
|
||||
Repeating invocation will fill themes one at a time (skips deprecated alias placeholders).
|
||||
Options:
|
||||
--force overwrite existing lists for that theme
|
||||
--top / --top-commanders size knobs forwarded to suggestion generator
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import yaml # type: ignore
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
|
||||
|
||||
def find_next_missing():
|
||||
for path in sorted(CATALOG_DIR.glob('*.yml')):
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
notes = data.get('notes', '')
|
||||
if isinstance(notes, str) and 'Deprecated alias file' in notes:
|
||||
continue
|
||||
# Completion rule: a theme is considered "missing" only if a key itself is absent.
|
||||
# We intentionally allow empty lists (e.g., obscure themes with no clear commanders)
|
||||
# so we don't get stuck repeatedly selecting the same file.
|
||||
if ('example_cards' not in data) or ('example_commanders' not in data):
|
||||
return data.get('display_name'), path.name
|
||||
return None, None
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
ap = argparse.ArgumentParser(description='Apply editorial examples to next missing theme')
|
||||
ap.add_argument('--force', action='store_true')
|
||||
ap.add_argument('--top', type=int, default=8)
|
||||
ap.add_argument('--top-commanders', type=int, default=5)
|
||||
args = ap.parse_args()
|
||||
theme, fname = find_next_missing()
|
||||
if not theme:
|
||||
print('All themes already have example_cards & example_commanders (or no YAML).')
|
||||
return
|
||||
print(f"Next missing theme: {theme} ({fname})")
|
||||
cmd = [
|
||||
sys.executable,
|
||||
str(ROOT / 'code' / 'scripts' / 'generate_theme_editorial_suggestions.py'),
|
||||
'--themes', theme,
|
||||
'--apply', '--limit-yaml', '1',
|
||||
'--top', str(args.top), '--top-commanders', str(args.top_commanders)
|
||||
]
|
||||
if args.force:
|
||||
cmd.append('--force')
|
||||
print('Running:', ' '.join(cmd))
|
||||
subprocess.run(cmd, check=False)
|
||||
# Post-pass: if we managed to add example_cards but no commanders were inferred, stamp an empty list
|
||||
# so subsequent runs proceed to the next theme instead of re-processing this one forever.
|
||||
if fname:
|
||||
target = CATALOG_DIR / fname
|
||||
try:
|
||||
data = yaml.safe_load(target.read_text(encoding='utf-8'))
|
||||
if isinstance(data, dict) and 'example_cards' in data and 'example_commanders' not in data:
|
||||
data['example_commanders'] = []
|
||||
target.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True), encoding='utf-8')
|
||||
print(f"[post] added empty example_commanders list to {fname} (no suggestions available)")
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"[post-warn] failed to add placeholder commanders for {fname}: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
367
code/scripts/build_theme_catalog.py
Normal file
367
code/scripts/build_theme_catalog.py
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
"""Phase B: Merge curated YAML catalog with regenerated analytics to build theme_list.json.
|
||||
|
||||
See roadmap Phase B goals. This script unifies generation:
|
||||
- Discovers themes (constants + tagger + CSV dynamic tags)
|
||||
- Applies whitelist governance (normalization, pruning, always_include)
|
||||
- Recomputes frequencies & PMI co-occurrence for inference
|
||||
- Loads curated YAML files (Phase A outputs) for editorial overrides
|
||||
- Merges curated, enforced, and inferred synergies with precedence
|
||||
- Applies synergy cap without truncating curated or enforced entries
|
||||
- Emits theme_list.json with provenance block
|
||||
|
||||
Opt-in via env THEME_CATALOG_MODE=merge (or build/phaseb). Or run manually:
|
||||
python code/scripts/build_theme_catalog.py --verbose
|
||||
|
||||
This is intentionally side-effect only (writes JSON). Unit tests for Phase C will
|
||||
add schema validation; for now we focus on deterministic, stable output.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
try: # Optional
|
||||
import yaml # type: ignore
|
||||
except Exception: # pragma: no cover
|
||||
yaml = None
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / 'code'
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
|
||||
from scripts.extract_themes import ( # type: ignore
|
||||
BASE_COLORS,
|
||||
collect_theme_tags_from_constants,
|
||||
collect_theme_tags_from_tagger_source,
|
||||
gather_theme_tag_rows,
|
||||
tally_tag_frequencies_by_base_color,
|
||||
compute_cooccurrence,
|
||||
cooccurrence_scores_for,
|
||||
derive_synergies_for_tags,
|
||||
apply_normalization,
|
||||
load_whitelist_config,
|
||||
should_keep_theme,
|
||||
)
|
||||
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
OUTPUT_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThemeYAML:
|
||||
id: str
|
||||
display_name: str
|
||||
curated_synergies: List[str]
|
||||
enforced_synergies: List[str]
|
||||
inferred_synergies: List[str]
|
||||
synergies: List[str]
|
||||
primary_color: Optional[str] = None
|
||||
secondary_color: Optional[str] = None
|
||||
notes: str = ''
|
||||
|
||||
|
||||
def _log(msg: str, verbose: bool): # pragma: no cover
|
||||
if verbose:
|
||||
print(f"[build_theme_catalog] {msg}", file=sys.stderr)
|
||||
|
||||
|
||||
def load_catalog_yaml(verbose: bool) -> Dict[str, ThemeYAML]:
|
||||
out: Dict[str, ThemeYAML] = {}
|
||||
if not CATALOG_DIR.exists() or yaml is None:
|
||||
return out
|
||||
for path in sorted(CATALOG_DIR.glob('*.yml')):
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||
except Exception:
|
||||
_log(f"Failed reading {path.name}", verbose)
|
||||
continue
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
# Skip deprecated alias placeholder files (marked in notes)
|
||||
try:
|
||||
notes_field = data.get('notes')
|
||||
if isinstance(notes_field, str) and 'Deprecated alias file' in notes_field:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
ty = ThemeYAML(
|
||||
id=str(data.get('id') or ''),
|
||||
display_name=str(data.get('display_name') or ''),
|
||||
curated_synergies=list(data.get('curated_synergies') or []),
|
||||
enforced_synergies=list(data.get('enforced_synergies') or []),
|
||||
inferred_synergies=list(data.get('inferred_synergies') or []),
|
||||
synergies=list(data.get('synergies') or []),
|
||||
primary_color=data.get('primary_color'),
|
||||
secondary_color=data.get('secondary_color'),
|
||||
notes=str(data.get('notes') or ''),
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if not ty.display_name:
|
||||
continue
|
||||
out[ty.display_name] = ty
|
||||
return out
|
||||
|
||||
|
||||
def regenerate_analytics(verbose: bool):
|
||||
theme_tags: Set[str] = set()
|
||||
theme_tags |= collect_theme_tags_from_constants()
|
||||
theme_tags |= collect_theme_tags_from_tagger_source()
|
||||
try:
|
||||
csv_rows = gather_theme_tag_rows()
|
||||
for row_tags in csv_rows:
|
||||
for t in row_tags:
|
||||
if isinstance(t, str) and t:
|
||||
theme_tags.add(t)
|
||||
except Exception:
|
||||
csv_rows = []
|
||||
|
||||
whitelist = load_whitelist_config()
|
||||
normalization_map: Dict[str, str] = whitelist.get('normalization', {}) if isinstance(whitelist.get('normalization'), dict) else {}
|
||||
exclusions: Set[str] = set(whitelist.get('exclusions', []) or [])
|
||||
protected_prefixes: List[str] = list(whitelist.get('protected_prefixes', []) or [])
|
||||
protected_suffixes: List[str] = list(whitelist.get('protected_suffixes', []) or [])
|
||||
min_overrides: Dict[str, int] = whitelist.get('min_frequency_overrides', {}) or {}
|
||||
|
||||
if normalization_map:
|
||||
theme_tags = apply_normalization(theme_tags, normalization_map)
|
||||
blacklist = {"Draw Triggers"}
|
||||
theme_tags = {t for t in theme_tags if t and t not in blacklist and t not in exclusions}
|
||||
|
||||
try:
|
||||
frequencies = tally_tag_frequencies_by_base_color()
|
||||
except Exception:
|
||||
frequencies = {}
|
||||
|
||||
if frequencies:
|
||||
def total_count(t: str) -> int:
|
||||
s = 0
|
||||
for c in BASE_COLORS.keys():
|
||||
try:
|
||||
s += int(frequencies.get(c, {}).get(t, 0))
|
||||
except Exception:
|
||||
pass
|
||||
return s
|
||||
kept: Set[str] = set()
|
||||
for t in list(theme_tags):
|
||||
if should_keep_theme(t, total_count(t), whitelist, protected_prefixes, protected_suffixes, min_overrides):
|
||||
kept.add(t)
|
||||
for extra in whitelist.get('always_include', []) or []:
|
||||
kept.add(str(extra))
|
||||
theme_tags = kept
|
||||
|
||||
try:
|
||||
rows = csv_rows if csv_rows else gather_theme_tag_rows()
|
||||
co_map, tag_counts, total_rows = compute_cooccurrence(rows)
|
||||
except Exception:
|
||||
co_map, tag_counts, total_rows = {}, Counter(), 0
|
||||
|
||||
return dict(theme_tags=theme_tags, frequencies=frequencies, co_map=co_map, tag_counts=tag_counts, total_rows=total_rows, whitelist=whitelist)
|
||||
|
||||
|
||||
def _primary_secondary(theme: str, freqs: Dict[str, Dict[str, int]]):
|
||||
if not freqs:
|
||||
return None, None
|
||||
items: List[Tuple[str, int]] = []
|
||||
for color in BASE_COLORS.keys():
|
||||
try:
|
||||
items.append((color, int(freqs.get(color, {}).get(theme, 0))))
|
||||
except Exception:
|
||||
items.append((color, 0))
|
||||
items.sort(key=lambda x: (-x[1], x[0]))
|
||||
if not items or items[0][1] <= 0:
|
||||
return None, None
|
||||
title = {'white': 'White', 'blue': 'Blue', 'black': 'Black', 'red': 'Red', 'green': 'Green'}
|
||||
primary = title[items[0][0]]
|
||||
secondary = None
|
||||
for c, n in items[1:]:
|
||||
if n > 0:
|
||||
secondary = title[c]
|
||||
break
|
||||
return primary, secondary
|
||||
|
||||
|
||||
def infer_synergies(anchor: str, curated: List[str], enforced: List[str], analytics: dict, pmi_min: float = 0.0, co_min: int = 5) -> List[str]:
|
||||
if anchor not in analytics['co_map'] or analytics['total_rows'] <= 0:
|
||||
return []
|
||||
scored = cooccurrence_scores_for(anchor, analytics['co_map'], analytics['tag_counts'], analytics['total_rows'])
|
||||
out: List[str] = []
|
||||
for other, score, co_count in scored:
|
||||
if score <= pmi_min or co_count < co_min:
|
||||
continue
|
||||
if other == anchor or other in curated or other in enforced or other in out:
|
||||
continue
|
||||
out.append(other)
|
||||
if len(out) >= 12:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
def build_catalog(limit: int, verbose: bool) -> Dict[str, Any]:
|
||||
analytics = regenerate_analytics(verbose)
|
||||
whitelist = analytics['whitelist']
|
||||
synergy_cap = int(whitelist.get('synergy_cap', 0) or 0)
|
||||
normalization_map: Dict[str, str] = whitelist.get('normalization', {}) if isinstance(whitelist.get('normalization'), dict) else {}
|
||||
enforced_cfg: Dict[str, List[str]] = whitelist.get('enforced_synergies', {}) or {}
|
||||
|
||||
yaml_catalog = load_catalog_yaml(verbose)
|
||||
all_themes: Set[str] = set(analytics['theme_tags']) | {t.display_name for t in yaml_catalog.values()}
|
||||
if normalization_map:
|
||||
all_themes = apply_normalization(all_themes, normalization_map)
|
||||
curated_baseline = derive_synergies_for_tags(all_themes)
|
||||
|
||||
entries: List[Dict[str, Any]] = []
|
||||
processed = 0
|
||||
for theme in sorted(all_themes):
|
||||
if limit and processed >= limit:
|
||||
break
|
||||
processed += 1
|
||||
y = yaml_catalog.get(theme)
|
||||
curated_list = list(y.curated_synergies) if y and y.curated_synergies else curated_baseline.get(theme, [])
|
||||
enforced_list: List[str] = []
|
||||
if y and y.enforced_synergies:
|
||||
for s in y.enforced_synergies:
|
||||
if s not in enforced_list:
|
||||
enforced_list.append(s)
|
||||
if theme in enforced_cfg:
|
||||
for s in enforced_cfg.get(theme, []):
|
||||
if s not in enforced_list:
|
||||
enforced_list.append(s)
|
||||
inferred_list = infer_synergies(theme, curated_list, enforced_list, analytics)
|
||||
if not inferred_list and y and y.inferred_synergies:
|
||||
inferred_list = [s for s in y.inferred_synergies if s not in curated_list and s not in enforced_list]
|
||||
|
||||
if normalization_map:
|
||||
def _norm(seq: List[str]) -> List[str]:
|
||||
seen = set()
|
||||
out = []
|
||||
for s in seq:
|
||||
s2 = normalization_map.get(s, s)
|
||||
if s2 not in seen:
|
||||
out.append(s2)
|
||||
seen.add(s2)
|
||||
return out
|
||||
curated_list = _norm(curated_list)
|
||||
enforced_list = _norm(enforced_list)
|
||||
inferred_list = _norm(inferred_list)
|
||||
|
||||
merged: List[str] = []
|
||||
for bucket in (curated_list, enforced_list, inferred_list):
|
||||
for s in bucket:
|
||||
if s == theme:
|
||||
continue
|
||||
if s not in merged:
|
||||
merged.append(s)
|
||||
|
||||
# Noise suppression: remove ubiquitous Legends/Historics links except for their mutual pairing.
|
||||
# Rationale: Every legendary permanent is tagged with both themes (Historics also covers artifacts/enchantments),
|
||||
# creating low-signal "synergies" that crowd out more meaningful relationships. Requirement:
|
||||
# - For any theme other than the two themselves, strip both "Legends Matter" and "Historics Matter".
|
||||
# - For "Legends Matter", allow "Historics Matter" to remain (and vice-versa).
|
||||
special_noise = {"Legends Matter", "Historics Matter"}
|
||||
if theme not in special_noise:
|
||||
if any(s in special_noise for s in merged):
|
||||
merged = [s for s in merged if s not in special_noise]
|
||||
# If theme is one of the special ones, keep the other if present (no action needed beyond above filter logic).
|
||||
|
||||
if synergy_cap > 0 and len(merged) > synergy_cap:
|
||||
ce_len = len(curated_list) + len([s for s in enforced_list if s not in curated_list])
|
||||
if ce_len < synergy_cap:
|
||||
allowed_inferred = synergy_cap - ce_len
|
||||
ce_part = merged[:ce_len]
|
||||
inferred_tail = [s for s in merged[ce_len:ce_len+allowed_inferred]]
|
||||
merged = ce_part + inferred_tail
|
||||
# else: keep all (soft exceed)
|
||||
|
||||
if y and (y.primary_color or y.secondary_color):
|
||||
primary, secondary = y.primary_color, y.secondary_color
|
||||
else:
|
||||
primary, secondary = _primary_secondary(theme, analytics['frequencies'])
|
||||
|
||||
entry = {'theme': theme, 'synergies': merged}
|
||||
if primary:
|
||||
entry['primary_color'] = primary
|
||||
if secondary:
|
||||
entry['secondary_color'] = secondary
|
||||
# Phase D: carry forward optional editorial metadata if present in YAML
|
||||
if y:
|
||||
if getattr(y, 'example_commanders', None):
|
||||
entry['example_commanders'] = [c for c in y.example_commanders if isinstance(c, str)][:12]
|
||||
if getattr(y, 'example_cards', None):
|
||||
# Limit to 20 for safety (UI may further cap)
|
||||
dedup_cards = []
|
||||
seen_cards = set()
|
||||
for c in y.example_cards:
|
||||
if isinstance(c, str) and c and c not in seen_cards:
|
||||
dedup_cards.append(c)
|
||||
seen_cards.add(c)
|
||||
if len(dedup_cards) >= 20:
|
||||
break
|
||||
if dedup_cards:
|
||||
entry['example_cards'] = dedup_cards
|
||||
if getattr(y, 'deck_archetype', None):
|
||||
entry['deck_archetype'] = y.deck_archetype
|
||||
if getattr(y, 'popularity_hint', None):
|
||||
entry['popularity_hint'] = y.popularity_hint
|
||||
# Pass through synergy_commanders if already curated (script will populate going forward)
|
||||
if hasattr(y, 'synergy_commanders') and getattr(y, 'synergy_commanders'):
|
||||
entry['synergy_commanders'] = [c for c in getattr(y, 'synergy_commanders') if isinstance(c, str)][:12]
|
||||
entries.append(entry)
|
||||
|
||||
provenance = {
|
||||
'mode': 'merge',
|
||||
'generated_at': time.strftime('%Y-%m-%dT%H:%M:%S'),
|
||||
'curated_yaml_files': len(yaml_catalog),
|
||||
'synergy_cap': synergy_cap,
|
||||
'inference': 'pmi',
|
||||
'version': 'phase-b-merge-v1'
|
||||
}
|
||||
return {
|
||||
'themes': entries,
|
||||
'frequencies_by_base_color': analytics['frequencies'],
|
||||
'generated_from': 'merge (analytics + curated YAML + whitelist)',
|
||||
'provenance': provenance,
|
||||
}
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
parser = argparse.ArgumentParser(description='Build merged theme catalog (Phase B)')
|
||||
parser.add_argument('--limit', type=int, default=0)
|
||||
parser.add_argument('--verbose', action='store_true')
|
||||
parser.add_argument('--dry-run', action='store_true')
|
||||
parser.add_argument('--schema', action='store_true', help='Print JSON Schema for catalog and exit')
|
||||
args = parser.parse_args()
|
||||
if args.schema:
|
||||
# Lazy import to avoid circular dependency: replicate minimal schema inline from models file if present
|
||||
try:
|
||||
from type_definitions_theme_catalog import ThemeCatalog # type: ignore
|
||||
import json as _json
|
||||
print(_json.dumps(ThemeCatalog.model_json_schema(), indent=2))
|
||||
return
|
||||
except Exception as _e: # pragma: no cover
|
||||
print(f"Failed to load schema models: {_e}")
|
||||
return
|
||||
data = build_catalog(limit=args.limit, verbose=args.verbose)
|
||||
if args.dry_run:
|
||||
print(json.dumps({'theme_count': len(data['themes']), 'provenance': data['provenance']}, indent=2))
|
||||
else:
|
||||
os.makedirs(OUTPUT_JSON.parent, exist_ok=True)
|
||||
with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
main()
|
||||
except Exception as e: # broad guard for orchestrator fallback
|
||||
print(f"ERROR: build_theme_catalog failed: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
150
code/scripts/export_themes_to_yaml.py
Normal file
150
code/scripts/export_themes_to_yaml.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""Phase A: Export existing generated theme_list.json into per-theme YAML files.
|
||||
|
||||
Generates one YAML file per theme under config/themes/catalog/<slug>.yml
|
||||
|
||||
Slug rules:
|
||||
- Lowercase
|
||||
- Alphanumerics kept
|
||||
- Spaces and consecutive separators -> single hyphen
|
||||
- '+' replaced with 'plus'
|
||||
- '/' replaced with '-'
|
||||
- Other punctuation removed
|
||||
- Collapse multiple hyphens
|
||||
|
||||
YAML schema (initial minimal):
|
||||
id: <slug>
|
||||
display_name: <theme>
|
||||
curated_synergies: [ ... ] # (only curated portion, best-effort guess)
|
||||
enforced_synergies: [ ... ] # (if present in whitelist enforced_synergies or auto-inferred cluster)
|
||||
primary_color: Optional TitleCase
|
||||
secondary_color: Optional TitleCase
|
||||
notes: '' # placeholder for editorial additions
|
||||
|
||||
We treat current synergy list (capped) as partially curated; we attempt to recover curated vs inferred by re-running
|
||||
`derive_synergies_for_tags` from extract_themes (imported) to see which curated anchors apply.
|
||||
|
||||
Safety: Does NOT overwrite an existing file unless --force provided.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set
|
||||
|
||||
import yaml # type: ignore
|
||||
|
||||
# Reuse logic from extract_themes by importing derive_synergies_for_tags
|
||||
import sys
|
||||
SCRIPT_ROOT = Path(__file__).resolve().parent
|
||||
CODE_ROOT = SCRIPT_ROOT.parent
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
from scripts.extract_themes import derive_synergies_for_tags # type: ignore
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
THEME_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
WHITELIST_YML = ROOT / 'config' / 'themes' / 'theme_whitelist.yml'
|
||||
|
||||
|
||||
def load_theme_json() -> Dict:
|
||||
if not THEME_JSON.exists():
|
||||
raise SystemExit(f"theme_list.json not found at {THEME_JSON}. Run extract_themes.py first.")
|
||||
return json.loads(THEME_JSON.read_text(encoding='utf-8'))
|
||||
|
||||
|
||||
def load_whitelist() -> Dict:
|
||||
if not WHITELIST_YML.exists():
|
||||
return {}
|
||||
try:
|
||||
return yaml.safe_load(WHITELIST_YML.read_text(encoding='utf-8')) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def slugify(name: str) -> str:
|
||||
s = name.strip().lower()
|
||||
s = s.replace('+', 'plus')
|
||||
s = s.replace('/', '-')
|
||||
# Replace spaces & underscores with hyphen
|
||||
s = re.sub(r'[\s_]+', '-', s)
|
||||
# Remove disallowed chars (keep alnum and hyphen)
|
||||
s = re.sub(r'[^a-z0-9-]', '', s)
|
||||
# Collapse multiple hyphens
|
||||
s = re.sub(r'-{2,}', '-', s)
|
||||
return s.strip('-')
|
||||
|
||||
|
||||
def recover_curated_synergies(all_themes: Set[str], theme: str) -> List[str]:
|
||||
# Recompute curated mapping and return the curated list if present
|
||||
curated_map = derive_synergies_for_tags(all_themes)
|
||||
return curated_map.get(theme, [])
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Export per-theme YAML catalog files (Phase A).')
|
||||
parser.add_argument('--force', action='store_true', help='Overwrite existing YAML files if present.')
|
||||
parser.add_argument('--limit', type=int, default=0, help='Limit export to first N themes (debug).')
|
||||
args = parser.parse_args()
|
||||
|
||||
data = load_theme_json()
|
||||
themes = data.get('themes', [])
|
||||
whitelist = load_whitelist()
|
||||
enforced_cfg = whitelist.get('enforced_synergies', {}) if isinstance(whitelist.get('enforced_synergies', {}), dict) else {}
|
||||
|
||||
all_theme_names: Set[str] = {t.get('theme') for t in themes if isinstance(t, dict) and t.get('theme')}
|
||||
|
||||
CATALOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
exported = 0
|
||||
for entry in themes:
|
||||
theme_name = entry.get('theme')
|
||||
if not theme_name:
|
||||
continue
|
||||
if args.limit and exported >= args.limit:
|
||||
break
|
||||
slug = slugify(theme_name)
|
||||
path = CATALOG_DIR / f'{slug}.yml'
|
||||
if path.exists() and not args.force:
|
||||
continue
|
||||
synergy_list = entry.get('synergies', []) or []
|
||||
# Attempt to separate curated portion (only for themes in curated mapping)
|
||||
curated_synergies = recover_curated_synergies(all_theme_names, theme_name)
|
||||
enforced_synergies = enforced_cfg.get(theme_name, [])
|
||||
# Keep order: curated -> enforced -> inferred. synergy_list already reflects that ordering from generation.
|
||||
# Filter curated to those present in current synergy_list to avoid stale entries.
|
||||
curated_synergies = [s for s in curated_synergies if s in synergy_list]
|
||||
# Remove enforced from curated to avoid duplication across buckets
|
||||
curated_synergies_clean = [s for s in curated_synergies if s not in enforced_synergies]
|
||||
# Inferred = remaining items in synergy_list not in curated or enforced
|
||||
curated_set = set(curated_synergies_clean)
|
||||
enforced_set = set(enforced_synergies)
|
||||
inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]
|
||||
|
||||
doc = {
|
||||
'id': slug,
|
||||
'display_name': theme_name,
|
||||
'synergies': synergy_list, # full capped list (ordered)
|
||||
'curated_synergies': curated_synergies_clean,
|
||||
'enforced_synergies': enforced_synergies,
|
||||
'inferred_synergies': inferred_synergies,
|
||||
'primary_color': entry.get('primary_color'),
|
||||
'secondary_color': entry.get('secondary_color'),
|
||||
'notes': ''
|
||||
}
|
||||
# Drop None color keys for cleanliness
|
||||
if doc['primary_color'] is None:
|
||||
doc.pop('primary_color')
|
||||
if doc.get('secondary_color') is None:
|
||||
doc.pop('secondary_color')
|
||||
with path.open('w', encoding='utf-8') as f:
|
||||
yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
|
||||
exported += 1
|
||||
|
||||
print(f"Exported {exported} theme YAML files to {CATALOG_DIR}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -221,12 +221,11 @@ def derive_synergies_for_tags(tags: Set[str]) -> Dict[str, List[str]]:
|
|||
("Noncreature Spells", ["Spellslinger", "Prowess"]),
|
||||
("Prowess", ["Spellslinger", "Noncreature Spells"]),
|
||||
# Artifacts / Enchantments
|
||||
("Artifacts Matter", ["Treasure Token", "Equipment", "Vehicles", "Improvise"]),
|
||||
("Artifacts Matter", ["Treasure Token", "Equipment Matters", "Vehicles", "Improvise"]),
|
||||
("Enchantments Matter", ["Auras", "Constellation", "Card Draw"]),
|
||||
("Auras", ["Constellation", "Voltron", "Enchantments Matter"]),
|
||||
("Equipment", ["Voltron", "Double Strike", "Warriors Matter"]),
|
||||
("Treasure Token", ["Sacrifice Matters", "Artifacts Matter", "Ramp"]),
|
||||
("Vehicles", ["Artifacts Matter", "Equipment"]),
|
||||
("Vehicles", ["Artifacts Matter", "Crew", "Vehicles"]),
|
||||
# Counters / Proliferate
|
||||
("Counters Matter", ["Proliferate", "+1/+1 Counters", "Adapt", "Outlast"]),
|
||||
("+1/+1 Counters", ["Proliferate", "Counters Matter", "Adapt", "Evolve"]),
|
||||
|
|
@ -237,7 +236,7 @@ def derive_synergies_for_tags(tags: Set[str]) -> Dict[str, List[str]]:
|
|||
("Landfall", ["Lands Matter", "Ramp", "Token Creation"]),
|
||||
("Domain", ["Lands Matter", "Ramp"]),
|
||||
# Combat / Voltron
|
||||
("Voltron", ["Equipment", "Auras", "Double Strike"]),
|
||||
("Voltron", ["Equipment Matters", "Auras", "Double Strike"]),
|
||||
# Card flow
|
||||
("Card Draw", ["Loot", "Wheels", "Replacement Draw", "Unconditional Draw", "Conditional Draw"]),
|
||||
("Loot", ["Card Draw", "Discard Matters", "Reanimate"]),
|
||||
|
|
|
|||
432
code/scripts/generate_theme_editorial_suggestions.py
Normal file
432
code/scripts/generate_theme_editorial_suggestions.py
Normal file
|
|
@ -0,0 +1,432 @@
|
|||
"""Generate editorial metadata suggestions for theme YAML files (Phase D helper).
|
||||
|
||||
Features:
|
||||
- Scans color CSV files (skips monolithic cards.csv unless --include-master)
|
||||
- Collects top-N (lowest EDHREC rank) cards per theme based on themeTags column
|
||||
- Optionally derives commander suggestions from commander_cards.csv (if present)
|
||||
- Provides dry-run output (default) or can patch YAML files that lack example_cards / example_commanders
|
||||
- Prints streaming progress so the user sees real-time status
|
||||
|
||||
Usage (dry run):
|
||||
python code/scripts/generate_theme_editorial_suggestions.py --themes "Landfall,Reanimate" --top 8
|
||||
|
||||
Write back missing fields (only if not already present):
|
||||
python code/scripts/generate_theme_editorial_suggestions.py --apply --limit-yaml 500
|
||||
|
||||
Safety:
|
||||
- Existing example_cards / example_commanders are never overwritten unless --force is passed
|
||||
- Writes are limited by --limit-yaml (default 0 means unlimited) to avoid massive churn accidentally
|
||||
|
||||
Heuristics:
|
||||
- Deduplicate card names per theme
|
||||
- Filter out names with extremely poor rank (> 60000) by default (configurable)
|
||||
- For commander suggestions, prefer legendary creatures/planeswalkers in commander_cards.csv whose themeTags includes the theme
|
||||
- Fallback commander suggestions: take top legendary cards from color CSVs tagged with the theme
|
||||
- synergy_commanders: derive from top 3 synergies of each theme (3 from top, 2 from second, 1 from third)
|
||||
- Promotion: if fewer than --min-examples example_commanders exist after normal suggestion, promote synergy_commanders (in order) into example_commanders, annotating with " - Synergy (<synergy name>)"
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Set
|
||||
import sys
|
||||
|
||||
try: # optional dependency safety
|
||||
import yaml # type: ignore
|
||||
except Exception:
|
||||
yaml = None
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CSV_DIR = ROOT / 'csv_files'
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
|
||||
COLOR_CSV_GLOB = '*_cards.csv'
|
||||
MASTER_FILE = 'cards.csv'
|
||||
COMMANDER_FILE = 'commander_cards.csv'
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThemeSuggestion:
|
||||
cards: List[str]
|
||||
commanders: List[str]
|
||||
synergy_commanders: List[str]
|
||||
|
||||
|
||||
def _parse_theme_tags(raw: str) -> List[str]:
|
||||
if not raw:
|
||||
return []
|
||||
raw = raw.strip()
|
||||
if not raw or raw == '[]':
|
||||
return []
|
||||
try:
|
||||
# themeTags stored like "['Landfall', 'Ramp']" – use literal_eval safely
|
||||
val = ast.literal_eval(raw)
|
||||
if isinstance(val, list):
|
||||
return [str(x) for x in val if isinstance(x, str)]
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback naive parse
|
||||
return [t.strip().strip("'\"") for t in raw.strip('[]').split(',') if t.strip()]
|
||||
|
||||
|
||||
def scan_color_csvs(include_master: bool, max_rank: float, progress_every: int) -> Tuple[Dict[str, List[Tuple[float, str]]], Dict[str, List[Tuple[float, str]]]]:
|
||||
theme_hits: Dict[str, List[Tuple[float, str]]] = {}
|
||||
legendary_hits: Dict[str, List[Tuple[float, str]]] = {}
|
||||
files: List[Path] = []
|
||||
for fp in sorted(CSV_DIR.glob(COLOR_CSV_GLOB)):
|
||||
name = fp.name
|
||||
if name == MASTER_FILE and not include_master:
|
||||
continue
|
||||
if name == COMMANDER_FILE:
|
||||
continue
|
||||
# skip testdata
|
||||
if 'testdata' in str(fp):
|
||||
continue
|
||||
files.append(fp)
|
||||
total_files = len(files)
|
||||
processed = 0
|
||||
for fp in files:
|
||||
processed += 1
|
||||
try:
|
||||
with fp.open(encoding='utf-8', newline='') as f:
|
||||
reader = csv.DictReader(f)
|
||||
line_idx = 0
|
||||
for row in reader:
|
||||
line_idx += 1
|
||||
if progress_every and line_idx % progress_every == 0:
|
||||
print(f"[scan] {fp.name} line {line_idx}", file=sys.stderr, flush=True)
|
||||
tags_raw = row.get('themeTags') or ''
|
||||
if not tags_raw:
|
||||
continue
|
||||
try:
|
||||
rank = float(row.get('edhrecRank') or 999999)
|
||||
except Exception:
|
||||
rank = 999999
|
||||
if rank > max_rank:
|
||||
continue
|
||||
tags = _parse_theme_tags(tags_raw)
|
||||
name = row.get('name') or ''
|
||||
if not name:
|
||||
continue
|
||||
is_legendary = False
|
||||
try:
|
||||
typ = row.get('type') or ''
|
||||
if isinstance(typ, str) and 'Legendary' in typ.split():
|
||||
is_legendary = True
|
||||
except Exception:
|
||||
pass
|
||||
for t in tags:
|
||||
if not t:
|
||||
continue
|
||||
theme_hits.setdefault(t, []).append((rank, name))
|
||||
if is_legendary:
|
||||
legendary_hits.setdefault(t, []).append((rank, name))
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"[warn] failed reading {fp.name}: {e}", file=sys.stderr)
|
||||
print(f"[scan] completed {fp.name} ({processed}/{total_files})", file=sys.stderr, flush=True)
|
||||
# Trim each bucket to reasonable size (keep best ranks)
|
||||
for mapping, cap in ((theme_hits, 120), (legendary_hits, 80)):
|
||||
for t, lst in mapping.items():
|
||||
lst.sort(key=lambda x: x[0])
|
||||
if len(lst) > cap:
|
||||
del lst[cap:]
|
||||
return theme_hits, legendary_hits
|
||||
|
||||
|
||||
def scan_commander_csv(max_rank: float) -> Dict[str, List[Tuple[float, str]]]:
|
||||
path = CSV_DIR / COMMANDER_FILE
|
||||
out: Dict[str, List[Tuple[float, str]]] = {}
|
||||
if not path.exists():
|
||||
return out
|
||||
try:
|
||||
with path.open(encoding='utf-8', newline='') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
tags_raw = row.get('themeTags') or ''
|
||||
if not tags_raw:
|
||||
continue
|
||||
tags = _parse_theme_tags(tags_raw)
|
||||
try:
|
||||
rank = float(row.get('edhrecRank') or 999999)
|
||||
except Exception:
|
||||
rank = 999999
|
||||
if rank > max_rank:
|
||||
continue
|
||||
name = row.get('name') or ''
|
||||
if not name:
|
||||
continue
|
||||
for t in tags:
|
||||
if not t:
|
||||
continue
|
||||
out.setdefault(t, []).append((rank, name))
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"[warn] failed reading {COMMANDER_FILE}: {e}", file=sys.stderr)
|
||||
for t, lst in out.items():
|
||||
lst.sort(key=lambda x: x[0])
|
||||
if len(lst) > 60:
|
||||
del lst[60:]
|
||||
return out
|
||||
|
||||
|
||||
def load_yaml_theme(path: Path) -> dict:
|
||||
try:
|
||||
return yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def write_yaml_theme(path: Path, data: dict):
|
||||
txt = yaml.safe_dump(data, sort_keys=False, allow_unicode=True)
|
||||
path.write_text(txt, encoding='utf-8')
|
||||
|
||||
|
||||
def build_suggestions(theme_hits: Dict[str, List[Tuple[float, str]]], commander_hits: Dict[str, List[Tuple[float, str]]], top: int, top_commanders: int, *, synergy_top=(3,2,1), min_examples: int = 5) -> Dict[str, ThemeSuggestion]:
|
||||
suggestions: Dict[str, ThemeSuggestion] = {}
|
||||
all_themes: Set[str] = set(theme_hits.keys()) | set(commander_hits.keys())
|
||||
for t in sorted(all_themes):
|
||||
card_names: List[str] = []
|
||||
if t in theme_hits:
|
||||
for rank, name in theme_hits[t][: top * 3]: # oversample then dedup
|
||||
if name not in card_names:
|
||||
card_names.append(name)
|
||||
if len(card_names) >= top:
|
||||
break
|
||||
commander_names: List[str] = []
|
||||
if t in commander_hits:
|
||||
for rank, name in commander_hits[t][: top_commanders * 2]:
|
||||
if name not in commander_names:
|
||||
commander_names.append(name)
|
||||
if len(commander_names) >= top_commanders:
|
||||
break
|
||||
# Placeholder synergy_commanders; will be filled later after we know synergies per theme from YAML
|
||||
suggestions[t] = ThemeSuggestion(cards=card_names, commanders=commander_names, synergy_commanders=[])
|
||||
return suggestions
|
||||
|
||||
|
||||
def _derive_synergy_commanders(base_theme: str, data: dict, all_yaml: Dict[str, dict], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1)) -> List[Tuple[str, str]]:
|
||||
"""Pick synergy commanders with their originating synergy label.
|
||||
Returns list of (commander_name, synergy_theme) preserving order of (top synergy, second, third) and internal ranking.
|
||||
"""
|
||||
synergies = data.get('synergies') or []
|
||||
if not isinstance(synergies, list):
|
||||
return []
|
||||
pattern = list(synergy_top)
|
||||
out: List[Tuple[str, str]] = []
|
||||
for idx, count in enumerate(pattern):
|
||||
if idx >= len(synergies):
|
||||
break
|
||||
s_name = synergies[idx]
|
||||
bucket = commander_hits.get(s_name) or []
|
||||
taken = 0
|
||||
for _, cname in bucket:
|
||||
if all(cname != existing for existing, _ in out):
|
||||
out.append((cname, s_name))
|
||||
taken += 1
|
||||
if taken >= count:
|
||||
break
|
||||
if taken < count:
|
||||
# fallback to legendary card hits tagged with that synergy
|
||||
fallback_bucket = legendary_hits.get(s_name) or []
|
||||
for _, cname in fallback_bucket:
|
||||
if all(cname != existing for existing, _ in out):
|
||||
out.append((cname, s_name))
|
||||
taken += 1
|
||||
if taken >= count:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
def _augment_synergies(data: dict, base_theme: str) -> bool:
|
||||
"""Heuristically augment the 'synergies' list when it's sparse.
|
||||
Rules:
|
||||
- If synergies length >= 3, leave as-is.
|
||||
- Start with existing synergies then append curated/enforced/inferred (in that order) if missing.
|
||||
- For any theme whose display_name contains 'Counter' add 'Counters Matter' and 'Proliferate'.
|
||||
Returns True if modified.
|
||||
"""
|
||||
synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
|
||||
if not isinstance(synergies, list):
|
||||
return False
|
||||
original = list(synergies)
|
||||
if len(synergies) < 3:
|
||||
for key in ('curated_synergies', 'enforced_synergies', 'inferred_synergies'):
|
||||
lst = data.get(key)
|
||||
if isinstance(lst, list):
|
||||
for s in lst:
|
||||
if isinstance(s, str) and s and s not in synergies:
|
||||
synergies.append(s)
|
||||
name = data.get('display_name') or base_theme
|
||||
if isinstance(name, str) and 'counter' in name.lower():
|
||||
for extra in ('Counters Matter', 'Proliferate'):
|
||||
if extra not in synergies:
|
||||
synergies.append(extra)
|
||||
# Deduplicate preserving order
|
||||
seen = set()
|
||||
deduped = []
|
||||
for s in synergies:
|
||||
if s not in seen:
|
||||
deduped.append(s)
|
||||
seen.add(s)
|
||||
if deduped != synergies:
|
||||
synergies = deduped
|
||||
if synergies != original:
|
||||
data['synergies'] = synergies
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def apply_to_yaml(suggestions: Dict[str, ThemeSuggestion], *, limit_yaml: int, force: bool, themes_filter: Set[str], commander_hits: Dict[str, List[Tuple[float, str]]], legendary_hits: Dict[str, List[Tuple[float, str]]], synergy_top=(3,2,1), min_examples: int = 5, augment_synergies: bool = False):
|
||||
updated = 0
|
||||
# Preload all YAML for synergy lookups (avoid repeated disk IO inside loop)
|
||||
all_yaml_cache: Dict[str, dict] = {}
|
||||
for p in CATALOG_DIR.glob('*.yml'):
|
||||
try:
|
||||
all_yaml_cache[p.name] = load_yaml_theme(p)
|
||||
except Exception:
|
||||
pass
|
||||
for path in sorted(CATALOG_DIR.glob('*.yml')):
|
||||
data = load_yaml_theme(path)
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
display = data.get('display_name')
|
||||
if not isinstance(display, str) or not display:
|
||||
continue
|
||||
if themes_filter and display not in themes_filter:
|
||||
continue
|
||||
sug = suggestions.get(display)
|
||||
if not sug:
|
||||
continue
|
||||
changed = False
|
||||
# Optional synergy augmentation prior to commander derivation
|
||||
if augment_synergies and _augment_synergies(data, display):
|
||||
changed = True
|
||||
# Derive synergy_commanders before promotion logic
|
||||
synergy_cmds = _derive_synergy_commanders(display, data, all_yaml_cache, commander_hits, legendary_hits, synergy_top=synergy_top)
|
||||
# Annotate synergy_commanders with their synergy source for transparency
|
||||
synergy_cmd_names = [f"{c} - Synergy ({src})" for c, src in synergy_cmds]
|
||||
if (force or not data.get('example_cards')) and sug.cards:
|
||||
data['example_cards'] = sug.cards
|
||||
changed = True
|
||||
existing_examples: List[str] = list(data.get('example_commanders') or []) if isinstance(data.get('example_commanders'), list) else []
|
||||
if force or not existing_examples:
|
||||
if sug.commanders:
|
||||
data['example_commanders'] = list(sug.commanders)
|
||||
existing_examples = data['example_commanders']
|
||||
changed = True
|
||||
# (Attachment of synergy_commanders moved to after promotion so we can filter duplicates with example_commanders)
|
||||
# Re-annotate existing example_commanders if they use old base-theme annotation pattern
|
||||
if existing_examples and synergy_cmds:
|
||||
# Detect old pattern: ends with base theme name inside parentheses
|
||||
needs_reannotate = False
|
||||
old_suffix = f" - Synergy ({display})"
|
||||
for ex in existing_examples:
|
||||
if ex.endswith(old_suffix):
|
||||
needs_reannotate = True
|
||||
break
|
||||
if needs_reannotate:
|
||||
# Build mapping from commander name to synergy source
|
||||
source_map = {name: src for name, src in synergy_cmds}
|
||||
new_examples: List[str] = []
|
||||
for ex in existing_examples:
|
||||
if ' - Synergy (' in ex:
|
||||
base_name = ex.split(' - Synergy ')[0]
|
||||
if base_name in source_map:
|
||||
new_examples.append(f"{base_name} - Synergy ({source_map[base_name]})")
|
||||
continue
|
||||
new_examples.append(ex)
|
||||
if new_examples != existing_examples:
|
||||
data['example_commanders'] = new_examples
|
||||
existing_examples = new_examples
|
||||
changed = True
|
||||
# Promotion: ensure at least min_examples in example_commanders by moving from synergy list (without duplicates)
|
||||
if (len(existing_examples) < min_examples) and synergy_cmd_names:
|
||||
needed = min_examples - len(existing_examples)
|
||||
promoted = []
|
||||
for cname, source_synergy in synergy_cmds:
|
||||
# Avoid duplicate even with annotation
|
||||
if not any(cname == base.split(' - Synergy ')[0] for base in existing_examples):
|
||||
annotated = f"{cname} - Synergy ({source_synergy})"
|
||||
existing_examples.append(annotated)
|
||||
promoted.append(cname)
|
||||
needed -= 1
|
||||
if needed <= 0:
|
||||
break
|
||||
if promoted:
|
||||
data['example_commanders'] = existing_examples
|
||||
changed = True
|
||||
# After any potential promotions / re-annotations, attach synergy_commanders excluding any commanders already present in example_commanders
|
||||
existing_base_names = {ex.split(' - Synergy ')[0] for ex in (data.get('example_commanders') or []) if isinstance(ex, str)}
|
||||
filtered_synergy_cmd_names = []
|
||||
for entry in synergy_cmd_names:
|
||||
base = entry.split(' - Synergy ')[0]
|
||||
if base not in existing_base_names:
|
||||
filtered_synergy_cmd_names.append(entry)
|
||||
prior_synergy_cmds = data.get('synergy_commanders') if isinstance(data.get('synergy_commanders'), list) else []
|
||||
if prior_synergy_cmds != filtered_synergy_cmd_names:
|
||||
if filtered_synergy_cmd_names or force or prior_synergy_cmds:
|
||||
data['synergy_commanders'] = filtered_synergy_cmd_names
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
write_yaml_theme(path, data)
|
||||
updated += 1
|
||||
print(f"[apply] updated {path.name}")
|
||||
if limit_yaml and updated >= limit_yaml:
|
||||
print(f"[apply] reached limit {limit_yaml}; stopping")
|
||||
break
|
||||
return updated
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
parser = argparse.ArgumentParser(description='Generate example_cards / example_commanders suggestions for theme YAML')
|
||||
parser.add_argument('--themes', type=str, help='Comma-separated subset of display names to restrict')
|
||||
parser.add_argument('--top', type=int, default=8, help='Target number of example_cards suggestions')
|
||||
parser.add_argument('--top-commanders', type=int, default=5, help='Target number of example_commanders suggestions')
|
||||
parser.add_argument('--max-rank', type=float, default=60000, help='Skip cards with EDHREC rank above this threshold')
|
||||
parser.add_argument('--include-master', action='store_true', help='Include large cards.csv in scan (slower)')
|
||||
parser.add_argument('--progress-every', type=int, default=0, help='Emit a progress line every N rows per file')
|
||||
parser.add_argument('--apply', action='store_true', help='Write missing fields into YAML files')
|
||||
parser.add_argument('--limit-yaml', type=int, default=0, help='Limit number of YAML files modified (0 = unlimited)')
|
||||
parser.add_argument('--force', action='store_true', help='Overwrite existing example lists')
|
||||
parser.add_argument('--min-examples', type=int, default=5, help='Minimum desired example_commanders; promote from synergy_commanders if short')
|
||||
parser.add_argument('--augment-synergies', action='store_true', help='Heuristically augment sparse synergies list before deriving synergy_commanders')
|
||||
args = parser.parse_args()
|
||||
|
||||
themes_filter: Set[str] = set()
|
||||
if args.themes:
|
||||
themes_filter = {t.strip() for t in args.themes.split(',') if t.strip()}
|
||||
|
||||
print('[info] scanning CSVs...', file=sys.stderr)
|
||||
theme_hits, legendary_hits = scan_color_csvs(args.include_master, args.max_rank, args.progress_every)
|
||||
print('[info] scanning commander CSV...', file=sys.stderr)
|
||||
commander_hits = scan_commander_csv(args.max_rank)
|
||||
print('[info] building suggestions...', file=sys.stderr)
|
||||
suggestions = build_suggestions(theme_hits, commander_hits, args.top, args.top_commanders, min_examples=args.min_examples)
|
||||
|
||||
if not args.apply:
|
||||
# Dry run: print JSON-like summary for filtered subset (or first 25 themes)
|
||||
to_show = sorted(themes_filter) if themes_filter else list(sorted(suggestions.keys())[:25])
|
||||
for t in to_show:
|
||||
s = suggestions.get(t)
|
||||
if not s:
|
||||
continue
|
||||
print(f"\n=== {t} ===")
|
||||
print('example_cards:', ', '.join(s.cards) or '(none)')
|
||||
print('example_commanders:', ', '.join(s.commanders) or '(none)')
|
||||
print('synergy_commanders: (computed at apply time)')
|
||||
print('\n[info] dry-run complete (use --apply to write)')
|
||||
return
|
||||
|
||||
if yaml is None:
|
||||
print('ERROR: PyYAML not installed; cannot apply changes.', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
updated = apply_to_yaml(suggestions, limit_yaml=args.limit_yaml, force=args.force, themes_filter=themes_filter, commander_hits=commander_hits, legendary_hits=legendary_hits, synergy_top=(3,2,1), min_examples=args.min_examples, augment_synergies=args.augment_synergies)
|
||||
print(f'[info] updated {updated} YAML files')
|
||||
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
main()
|
||||
149
code/scripts/lint_theme_editorial.py
Normal file
149
code/scripts/lint_theme_editorial.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
"""Phase D: Lint editorial metadata for theme YAML files.
|
||||
|
||||
Checks (non-fatal unless --strict):
|
||||
- example_commanders/example_cards length & uniqueness
|
||||
- deck_archetype membership in allowed set (warn if unknown)
|
||||
- Cornerstone themes have at least one example commander & card
|
||||
|
||||
Exit codes:
|
||||
0: No errors (warnings may still print)
|
||||
1: Structural / fatal errors (in strict mode or malformed YAML)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Set
|
||||
import re
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except Exception: # pragma: no cover
|
||||
yaml = None
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
|
||||
|
||||
ALLOWED_ARCHETYPES: Set[str] = {
|
||||
'Lands', 'Graveyard', 'Planeswalkers', 'Tokens', 'Counters', 'Spells', 'Artifacts', 'Enchantments', 'Politics'
|
||||
}
|
||||
|
||||
CORNERSTONE: Set[str] = {
|
||||
'Landfall', 'Reanimate', 'Superfriends', 'Tokens Matter', '+1/+1 Counters'
|
||||
}
|
||||
|
||||
|
||||
def lint(strict: bool) -> int:
|
||||
if yaml is None:
|
||||
print('YAML support not available (PyYAML missing); skipping lint.')
|
||||
return 0
|
||||
if not CATALOG_DIR.exists():
|
||||
print('Catalog directory missing; nothing to lint.')
|
||||
return 0
|
||||
errors: List[str] = []
|
||||
warnings: List[str] = []
|
||||
cornerstone_present: Set[str] = set()
|
||||
seen_display: Set[str] = set()
|
||||
ann_re = re.compile(r" - Synergy \(([^)]+)\)$")
|
||||
for path in sorted(CATALOG_DIR.glob('*.yml')):
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||
except Exception as e:
|
||||
errors.append(f"Failed to parse {path.name}: {e}")
|
||||
continue
|
||||
if not isinstance(data, dict):
|
||||
errors.append(f"YAML not mapping: {path.name}")
|
||||
continue
|
||||
name = str(data.get('display_name') or '').strip()
|
||||
if not name:
|
||||
continue
|
||||
# Skip deprecated alias placeholder files
|
||||
notes_field = data.get('notes')
|
||||
if isinstance(notes_field, str) and 'Deprecated alias file' in notes_field:
|
||||
continue
|
||||
if name in seen_display:
|
||||
# Already processed a canonical file for this display name; skip duplicates (aliases)
|
||||
continue
|
||||
seen_display.add(name)
|
||||
ex_cmd = data.get('example_commanders') or []
|
||||
ex_cards = data.get('example_cards') or []
|
||||
synergy_cmds = data.get('synergy_commanders') if isinstance(data.get('synergy_commanders'), list) else []
|
||||
theme_synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
|
||||
if not isinstance(ex_cmd, list):
|
||||
errors.append(f"example_commanders not list in {path.name}")
|
||||
ex_cmd = []
|
||||
if not isinstance(ex_cards, list):
|
||||
errors.append(f"example_cards not list in {path.name}")
|
||||
ex_cards = []
|
||||
# Length caps
|
||||
if len(ex_cmd) > 12:
|
||||
warnings.append(f"{name}: example_commanders trimmed to 12 (found {len(ex_cmd)})")
|
||||
if len(ex_cards) > 20:
|
||||
warnings.append(f"{name}: example_cards length {len(ex_cards)} > 20 (consider trimming)")
|
||||
if synergy_cmds and len(synergy_cmds) > 6:
|
||||
warnings.append(f"{name}: synergy_commanders length {len(synergy_cmds)} > 6 (3/2/1 pattern expected)")
|
||||
if ex_cmd and len(ex_cmd) < 5:
|
||||
warnings.append(f"{name}: example_commanders only {len(ex_cmd)} (<5 minimum target)")
|
||||
if not synergy_cmds and any(' - Synergy (' in c for c in ex_cmd):
|
||||
# If synergy_commanders intentionally filtered out because all synergy picks were promoted, skip warning.
|
||||
# Heuristic: if at least 5 examples and every annotated example has unique base name, treat as satisfied.
|
||||
base_names = {c.split(' - Synergy ')[0] for c in ex_cmd if ' - Synergy (' in c}
|
||||
if not (len(ex_cmd) >= 5 and len(base_names) >= 1):
|
||||
warnings.append(f"{name}: has synergy-annotated example_commanders but missing synergy_commanders list")
|
||||
# Uniqueness
|
||||
if len(set(ex_cmd)) != len(ex_cmd):
|
||||
warnings.append(f"{name}: duplicate entries in example_commanders")
|
||||
if len(set(ex_cards)) != len(ex_cards):
|
||||
warnings.append(f"{name}: duplicate entries in example_cards")
|
||||
if synergy_cmds:
|
||||
base_synergy_names = [c.split(' - Synergy ')[0] for c in synergy_cmds]
|
||||
if len(set(base_synergy_names)) != len(base_synergy_names):
|
||||
warnings.append(f"{name}: duplicate entries in synergy_commanders (base names)")
|
||||
|
||||
# Annotation validation: each annotated example should reference a synergy in theme synergies
|
||||
for c in ex_cmd:
|
||||
if ' - Synergy (' in c:
|
||||
m = ann_re.search(c)
|
||||
if m:
|
||||
syn = m.group(1).strip()
|
||||
if syn and syn not in theme_synergies:
|
||||
warnings.append(f"{name}: example commander annotation synergy '{syn}' not in theme synergies list")
|
||||
# Cornerstone coverage
|
||||
if name in CORNERSTONE:
|
||||
if not ex_cmd:
|
||||
warnings.append(f"Cornerstone theme {name} missing example_commanders")
|
||||
if not ex_cards:
|
||||
warnings.append(f"Cornerstone theme {name} missing example_cards")
|
||||
else:
|
||||
cornerstone_present.add(name)
|
||||
# Archetype
|
||||
arch = data.get('deck_archetype')
|
||||
if arch and arch not in ALLOWED_ARCHETYPES:
|
||||
warnings.append(f"{name}: deck_archetype '{arch}' not in allowed set {sorted(ALLOWED_ARCHETYPES)}")
|
||||
# Summaries
|
||||
if warnings:
|
||||
print('LINT WARNINGS:')
|
||||
for w in warnings:
|
||||
print(f" - {w}")
|
||||
if errors:
|
||||
print('LINT ERRORS:')
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
if errors and strict:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
parser = argparse.ArgumentParser(description='Lint editorial metadata for theme YAML files (Phase D)')
|
||||
parser.add_argument('--strict', action='store_true', help='Treat errors as fatal (non-zero exit)')
|
||||
args = parser.parse_args()
|
||||
rc = lint(args.strict)
|
||||
if rc != 0:
|
||||
sys.exit(rc)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
260
code/scripts/validate_theme_catalog.py
Normal file
260
code/scripts/validate_theme_catalog.py
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
"""Validation script for theme catalog (Phase C groundwork).
|
||||
|
||||
Performs:
|
||||
- Pydantic model validation
|
||||
- Duplicate theme detection
|
||||
- Enforced synergies presence check (from whitelist)
|
||||
- Normalization idempotency check (optional --rebuild-pass)
|
||||
- Synergy cap enforcement (allowing soft exceed when curated+enforced exceed cap)
|
||||
- JSON Schema export (--schema / --schema-out)
|
||||
|
||||
Exit codes:
|
||||
0 success
|
||||
1 validation errors (structural)
|
||||
2 policy errors (duplicates, missing enforced synergies, cap violations)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set
|
||||
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except Exception:
|
||||
yaml = None
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / 'code'
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
|
||||
from type_definitions_theme_catalog import ThemeCatalog, ThemeYAMLFile # type: ignore
|
||||
from scripts.extract_themes import load_whitelist_config # type: ignore
|
||||
from scripts.build_theme_catalog import build_catalog # type: ignore
|
||||
|
||||
CATALOG_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
|
||||
|
||||
|
||||
def load_catalog_file() -> Dict:
|
||||
if not CATALOG_JSON.exists():
|
||||
raise SystemExit(f"Catalog JSON missing: {CATALOG_JSON}")
|
||||
return json.loads(CATALOG_JSON.read_text(encoding='utf-8'))
|
||||
|
||||
|
||||
def validate_catalog(data: Dict, *, whitelist: Dict, allow_soft_exceed: bool = True) -> List[str]:
|
||||
errors: List[str] = []
|
||||
# If provenance missing (legacy extraction output), inject synthetic one so subsequent checks can proceed
|
||||
if 'provenance' not in data:
|
||||
data['provenance'] = {
|
||||
'mode': 'legacy-extraction',
|
||||
'generated_at': 'unknown',
|
||||
'curated_yaml_files': 0,
|
||||
'synergy_cap': int(whitelist.get('synergy_cap', 0) or 0),
|
||||
'inference': 'unknown',
|
||||
'version': 'pre-merge-fallback'
|
||||
}
|
||||
if 'generated_from' not in data:
|
||||
data['generated_from'] = 'legacy (tagger + constants)'
|
||||
try:
|
||||
catalog = ThemeCatalog(**data)
|
||||
except Exception as e: # structural validation
|
||||
errors.append(f"Pydantic validation failed: {e}")
|
||||
return errors
|
||||
|
||||
# Duplicate detection
|
||||
seen: Set[str] = set()
|
||||
dups: Set[str] = set()
|
||||
for t in catalog.themes:
|
||||
if t.theme in seen:
|
||||
dups.add(t.theme)
|
||||
seen.add(t.theme)
|
||||
if dups:
|
||||
errors.append(f"Duplicate theme entries detected: {sorted(dups)}")
|
||||
|
||||
enforced_cfg: Dict[str, List[str]] = whitelist.get('enforced_synergies', {}) or {}
|
||||
synergy_cap = int(whitelist.get('synergy_cap', 0) or 0)
|
||||
|
||||
# Fast index
|
||||
theme_map = {t.theme: t for t in catalog.themes}
|
||||
|
||||
# Enforced presence & cap checks
|
||||
for anchor, required in enforced_cfg.items():
|
||||
if anchor not in theme_map:
|
||||
continue # pruning may allow non-always_include anchors to drop
|
||||
syn = theme_map[anchor].synergies
|
||||
missing = [r for r in required if r not in syn]
|
||||
if missing:
|
||||
errors.append(f"Anchor '{anchor}' missing enforced synergies: {missing}")
|
||||
if synergy_cap and len(syn) > synergy_cap:
|
||||
if not allow_soft_exceed:
|
||||
errors.append(f"Anchor '{anchor}' exceeds synergy cap ({len(syn)}>{synergy_cap})")
|
||||
|
||||
# Cap enforcement for non-soft-exceeding cases
|
||||
if synergy_cap:
|
||||
for t in catalog.themes:
|
||||
if len(t.synergies) > synergy_cap:
|
||||
# Determine if soft exceed allowed: curated+enforced > cap (we can't reconstruct curated precisely here)
|
||||
# Heuristic: if enforced list for anchor exists AND all enforced appear AND len(enforced)>=cap then allow.
|
||||
enforced = set(enforced_cfg.get(t.theme, []))
|
||||
if not (allow_soft_exceed and enforced and enforced.issubset(set(t.synergies)) and len(enforced) >= synergy_cap):
|
||||
# Allow also if enforced+first curated guess (inference fallback) obviously pushes over cap (can't fully know); skip strict enforcement
|
||||
pass # Keep heuristic permissive for now
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def validate_yaml_files(*, whitelist: Dict, strict_alias: bool = False) -> List[str]:
|
||||
"""Validate individual YAML catalog files.
|
||||
|
||||
strict_alias: if True, treat presence of a deprecated alias (normalization key)
|
||||
as a hard error instead of a soft ignored transitional state.
|
||||
"""
|
||||
errors: List[str] = []
|
||||
catalog_dir = ROOT / 'config' / 'themes' / 'catalog'
|
||||
if not catalog_dir.exists():
|
||||
return errors
|
||||
seen_ids: Set[str] = set()
|
||||
normalization_map: Dict[str, str] = whitelist.get('normalization', {}) if isinstance(whitelist.get('normalization'), dict) else {}
|
||||
always_include = set(whitelist.get('always_include', []) or [])
|
||||
present_always: Set[str] = set()
|
||||
for path in sorted(catalog_dir.glob('*.yml')):
|
||||
try:
|
||||
raw = yaml.safe_load(path.read_text(encoding='utf-8')) if yaml else None
|
||||
except Exception:
|
||||
errors.append(f"Failed to parse YAML: {path.name}")
|
||||
continue
|
||||
if not isinstance(raw, dict):
|
||||
errors.append(f"YAML not a mapping: {path.name}")
|
||||
continue
|
||||
try:
|
||||
obj = ThemeYAMLFile(**raw)
|
||||
except Exception as e:
|
||||
errors.append(f"YAML schema violation {path.name}: {e}")
|
||||
continue
|
||||
# Duplicate id detection
|
||||
if obj.id in seen_ids:
|
||||
errors.append(f"Duplicate YAML id: {obj.id}")
|
||||
seen_ids.add(obj.id)
|
||||
# Normalization alias check: display_name should already be normalized if in map
|
||||
if normalization_map and obj.display_name in normalization_map.keys():
|
||||
if strict_alias:
|
||||
errors.append(f"Alias display_name present in strict mode: {obj.display_name} ({path.name})")
|
||||
# else soft-ignore for transitional period
|
||||
if obj.display_name in always_include:
|
||||
present_always.add(obj.display_name)
|
||||
missing_always = always_include - present_always
|
||||
if missing_always:
|
||||
# Not necessarily fatal if those only exist in analytics; warn for now.
|
||||
errors.append(f"always_include themes missing YAML files: {sorted(missing_always)}")
|
||||
return errors
|
||||
|
||||
|
||||
def main(): # pragma: no cover
|
||||
parser = argparse.ArgumentParser(description='Validate theme catalog (Phase C)')
|
||||
parser.add_argument('--schema', action='store_true', help='Print JSON Schema for catalog and exit')
|
||||
parser.add_argument('--schema-out', type=str, help='Write JSON Schema to file path')
|
||||
parser.add_argument('--rebuild-pass', action='store_true', help='Rebuild catalog in-memory and ensure stable equality vs file')
|
||||
parser.add_argument('--fail-soft-exceed', action='store_true', help='Treat synergy list length > cap as error even for soft exceed')
|
||||
parser.add_argument('--yaml-schema', action='store_true', help='Print JSON Schema for per-file ThemeYAML and exit')
|
||||
parser.add_argument('--strict-alias', action='store_true', help='Fail if any YAML uses an alias name slated for normalization')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.schema:
|
||||
schema = ThemeCatalog.model_json_schema()
|
||||
if args.schema_out:
|
||||
Path(args.schema_out).write_text(json.dumps(schema, indent=2), encoding='utf-8')
|
||||
else:
|
||||
print(json.dumps(schema, indent=2))
|
||||
return
|
||||
if args.yaml_schema:
|
||||
schema = ThemeYAMLFile.model_json_schema()
|
||||
if args.schema_out:
|
||||
Path(args.schema_out).write_text(json.dumps(schema, indent=2), encoding='utf-8')
|
||||
else:
|
||||
print(json.dumps(schema, indent=2))
|
||||
return
|
||||
|
||||
whitelist = load_whitelist_config()
|
||||
data = load_catalog_file()
|
||||
errors = validate_catalog(data, whitelist=whitelist, allow_soft_exceed=not args.fail_soft_exceed)
|
||||
errors.extend(validate_yaml_files(whitelist=whitelist, strict_alias=args.strict_alias))
|
||||
|
||||
if args.rebuild_pass:
|
||||
rebuilt = build_catalog(limit=0, verbose=False)
|
||||
# Compare canonical dict dumps (ordering of themes is deterministic: sorted by theme name in build script)
|
||||
normalization_map: Dict[str, str] = whitelist.get('normalization', {}) if isinstance(whitelist.get('normalization'), dict) else {}
|
||||
|
||||
def _canon(theme_list):
|
||||
canon: Dict[str, Dict] = {}
|
||||
for t in theme_list:
|
||||
name = t.get('theme')
|
||||
if not isinstance(name, str):
|
||||
continue
|
||||
name_canon = normalization_map.get(name, name)
|
||||
sy = t.get('synergies', [])
|
||||
if not isinstance(sy, list):
|
||||
sy_sorted = []
|
||||
else:
|
||||
# Apply normalization inside synergies too
|
||||
sy_norm = [normalization_map.get(s, s) for s in sy if isinstance(s, str)]
|
||||
sy_sorted = sorted(set(sy_norm))
|
||||
entry = {
|
||||
'theme': name_canon,
|
||||
'synergies': sy_sorted,
|
||||
}
|
||||
# Keep first (curated/enforced precedence differences ignored for alias collapse)
|
||||
canon.setdefault(name_canon, entry)
|
||||
# Return list sorted by canonical name
|
||||
return [canon[k] for k in sorted(canon.keys())]
|
||||
|
||||
file_dump = json.dumps(_canon(data.get('themes', [])), sort_keys=True)
|
||||
rebuilt_dump = json.dumps(_canon(rebuilt.get('themes', [])), sort_keys=True)
|
||||
if file_dump != rebuilt_dump:
|
||||
# Provide lightweight diff diagnostics (first 10 differing characters and sample themes)
|
||||
try:
|
||||
import difflib
|
||||
file_list = json.loads(file_dump)
|
||||
reb_list = json.loads(rebuilt_dump)
|
||||
file_names = [t['theme'] for t in file_list]
|
||||
reb_names = [t['theme'] for t in reb_list]
|
||||
missing_in_reb = sorted(set(file_names) - set(reb_names))[:5]
|
||||
extra_in_reb = sorted(set(reb_names) - set(file_names))[:5]
|
||||
# Find first theme with differing synergies
|
||||
synergy_mismatch = None
|
||||
for f in file_list:
|
||||
for r in reb_list:
|
||||
if f['theme'] == r['theme'] and f['synergies'] != r['synergies']:
|
||||
synergy_mismatch = (f['theme'], f['synergies'][:10], r['synergies'][:10])
|
||||
break
|
||||
if synergy_mismatch:
|
||||
break
|
||||
diff_note_parts = []
|
||||
if missing_in_reb:
|
||||
diff_note_parts.append(f"missing:{missing_in_reb}")
|
||||
if extra_in_reb:
|
||||
diff_note_parts.append(f"extra:{extra_in_reb}")
|
||||
if synergy_mismatch:
|
||||
diff_note_parts.append(f"synergy_mismatch:{synergy_mismatch}")
|
||||
if not diff_note_parts:
|
||||
# generic char diff snippet
|
||||
for line in difflib.unified_diff(file_dump.splitlines(), rebuilt_dump.splitlines(), n=1):
|
||||
diff_note_parts.append(line)
|
||||
if len(diff_note_parts) > 10:
|
||||
break
|
||||
errors.append('Normalization / rebuild pass produced differing theme list output ' + ' | '.join(diff_note_parts))
|
||||
except Exception:
|
||||
errors.append('Normalization / rebuild pass produced differing theme list output (diff unavailable)')
|
||||
|
||||
if errors:
|
||||
print('VALIDATION FAILED:')
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
sys.exit(2)
|
||||
print('Theme catalog validation passed.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue