feat(tagging+archetypes): add Pillowfort/Politics/Midrange/Toolbox tagging and unify archetype presence skip logic

2025-12-17 08:00:13 +01:00 · 2025-09-19 11:53:52 -07:00 · 2025-09-19 11:53:52 -07:00 · 6d6243d6be
commit 6d6243d6be
parent f2a76d2ffc
47 changed files with 21133 additions and 839 deletions
--- a/code/tests/test_archetype_theme_presence.py
+++ b/code/tests/test_archetype_theme_presence.py
@ -0,0 +1,44 @@
+"""Ensure each enumerated deck archetype has at least one theme YAML with matching deck_archetype.
+Also validates presence of core archetype display_name entries for discoverability.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+import yaml  # type: ignore
+import pytest
+
+ROOT = Path(__file__).resolve().parents[2]
+CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
+
+ARHCETYPE_MIN = 1
+
+# Mirror of ALLOWED_DECK_ARCHETYPES (keep in sync or import if packaging adjusted)
+ALLOWED = {
+    'Graveyard', 'Tokens', 'Counters', 'Spells', 'Artifacts', 'Enchantments', 'Lands', 'Politics', 'Combo',
+    'Aggro', 'Control', 'Midrange', 'Stax', 'Ramp', 'Toolbox'
+}
+
+
+def test_each_archetype_present():
+    """Validate at least one theme YAML declares each deck_archetype.
+
+    Skips gracefully when the generated theme catalog is not available in the
+    current environment (e.g., minimal install without generated YAML assets).
+    """
+    yaml_files = list(CATALOG_DIR.glob('*.yml'))
+    found = {a: 0 for a in ALLOWED}
+
+    for p in yaml_files:
+        data = yaml.safe_load(p.read_text(encoding='utf-8'))
+        if not isinstance(data, dict):
+            continue
+        arch = data.get('deck_archetype')
+        if arch in found:
+            found[arch] += 1
+
+    # Unified skip: either no files OR zero assignments discovered.
+    if (not yaml_files) or all(c == 0 for c in found.values()):
+        pytest.skip("Theme catalog not present; skipping archetype presence check.")
+
+    missing = [a for a, c in found.items() if c < ARHCETYPE_MIN]
+    assert not missing, f"Archetypes lacking themed representation: {missing}"
--- a/code/tests/test_description_mapping_validation.py
+++ b/code/tests/test_description_mapping_validation.py
@ -0,0 +1,37 @@
+import subprocess
+import sys
+import json
+import os
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = ROOT / 'code' / 'scripts' / 'build_theme_catalog.py'
+VALIDATE = ROOT / 'code' / 'scripts' / 'validate_description_mapping.py'
+TEMP_OUT = ROOT / 'config' / 'themes' / 'theme_list_mapping_test.json'
+
+
+def test_description_mapping_validator_runs():
+    res = subprocess.run([sys.executable, str(VALIDATE)], capture_output=True, text=True)
+    assert res.returncode == 0, res.stderr or res.stdout
+    assert 'Mapping OK' in (res.stdout + res.stderr)
+
+
+def test_mapping_applies_to_catalog():
+    env = os.environ.copy()
+    env['EDITORIAL_INCLUDE_FALLBACK_SUMMARY'] = '1'
+    # Build catalog to alternate path
+    res = subprocess.run([sys.executable, str(SCRIPT), '--output', str(TEMP_OUT)], capture_output=True, text=True, env=env)
+    assert res.returncode == 0, res.stderr
+    data = json.loads(TEMP_OUT.read_text(encoding='utf-8'))
+    themes = data.get('themes', [])
+    assert themes, 'No themes generated'
+    # Pick a theme that should clearly match a mapping rule (e.g., contains "Treasure")
+    mapped = [t for t in themes if 'Treasure' in t.get('theme','')]
+    if mapped:
+        desc = mapped[0].get('description','')
+        assert 'Treasure tokens' in desc or 'Treasure token' in desc
+    # Clean up
+    try:
+        TEMP_OUT.unlink()
+    except Exception:
+        pass
--- a/code/tests/test_editorial_governance_phase_d_closeout.py
+++ b/code/tests/test_editorial_governance_phase_d_closeout.py
@ -0,0 +1,142 @@
+"""Phase D Close-Out Governance Tests
+
+These tests enforce remaining non-UI editorial guarantees before Phase E.
+
+Coverage:
+ - Deterministic build under EDITORIAL_SEED (structure equality ignoring metadata_info timestamps)
+ - KPI history JSONL integrity (monotonic timestamps, schema fields, ratio consistency)
+ - metadata_info block coverage across YAML catalog (>=95%)
+ - synergy_commanders do not duplicate (base) example_commanders
+ - Mapping trigger specialization guard: any theme name matching a description mapping trigger
+   must NOT retain a generic fallback description ("Builds around ..."). Tribal phrasing beginning
+   with "Focuses on getting" is allowed.
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Any, List, Set
+
+
+ROOT = Path(__file__).resolve().parents[2]
+THEMES_DIR = ROOT / 'config' / 'themes'
+CATALOG_JSON = THEMES_DIR / 'theme_list.json'
+CATALOG_DIR = THEMES_DIR / 'catalog'
+HISTORY = THEMES_DIR / 'description_fallback_history.jsonl'
+MAPPING = THEMES_DIR / 'description_mapping.yml'
+
+
+def _load_catalog() -> Dict[str, Any]:
+    data = json.loads(CATALOG_JSON.read_text(encoding='utf-8'))
+    assert 'themes' in data and isinstance(data['themes'], list)
+    return data
+
+
+def test_deterministic_build_under_seed():
+    # Import build after setting seed env
+    os.environ['EDITORIAL_SEED'] = '999'
+    from scripts.build_theme_catalog import build_catalog  # type: ignore
+    first = build_catalog(limit=0, verbose=False)
+    second = build_catalog(limit=0, verbose=False)
+    # Drop volatile metadata_info/timestamp fields before comparison
+    for d in (first, second):
+        d.pop('metadata_info', None)
+        d.pop('yaml_catalog', None)
+    assert first == second, "Catalog build not deterministic under identical EDITORIAL_SEED"
+
+
+def test_kpi_history_integrity():
+    assert HISTORY.exists(), "KPI history file missing"
+    lines = [line.strip() for line in HISTORY.read_text(encoding='utf-8').splitlines() if line.strip()]
+    assert lines, "KPI history empty"
+    prev_ts: datetime | None = None
+    for ln in lines:
+        rec = json.loads(ln)
+        for field in ['timestamp', 'total_themes', 'generic_total', 'generic_with_synergies', 'generic_plain', 'generic_pct']:
+            assert field in rec, f"History record missing field {field}"
+        # Timestamp parse & monotonic (allow equal for rapid successive builds)
+        ts = datetime.fromisoformat(rec['timestamp'])
+        if prev_ts:
+            assert ts >= prev_ts, "History timestamps not monotonic non-decreasing"
+        prev_ts = ts
+        total = max(1, int(rec['total_themes']))
+        recomputed_pct = 100.0 * int(rec['generic_total']) / total
+        # Allow small rounding drift
+        assert abs(recomputed_pct - float(rec['generic_pct'])) <= 0.2, "generic_pct inconsistent with totals"
+
+
+def test_metadata_info_block_coverage():
+    import yaml  # type: ignore
+    assert CATALOG_DIR.exists(), "Catalog YAML directory missing"
+    total = 0
+    with_prov = 0
+    for p in CATALOG_DIR.glob('*.yml'):
+        data = yaml.safe_load(p.read_text(encoding='utf-8'))
+        if not isinstance(data, dict):
+            continue
+        # Skip deprecated alias placeholders
+        notes = data.get('notes')
+        if isinstance(notes, str) and 'Deprecated alias file' in notes:
+            continue
+        if not data.get('display_name'):
+            continue
+        total += 1
+        meta = data.get('metadata_info') or data.get('provenance')
+        if isinstance(meta, dict) and meta.get('last_backfill') and meta.get('script'):
+            with_prov += 1
+    assert total > 0, "No YAML files discovered for provenance check"
+    coverage = with_prov / total
+    assert coverage >= 0.95, f"metadata_info coverage below threshold: {coverage:.2%} (wanted >=95%)"
+
+
+def test_synergy_commanders_exclusion_of_examples():
+    import yaml  # type: ignore
+    pattern = re.compile(r" - Synergy \(.*\)$")
+    violations: List[str] = []
+    for p in CATALOG_DIR.glob('*.yml'):
+        data = yaml.safe_load(p.read_text(encoding='utf-8'))
+        if not isinstance(data, dict) or not data.get('display_name'):
+            continue
+        ex_cmd = data.get('example_commanders') or []
+        sy_cmd = data.get('synergy_commanders') or []
+        if not (isinstance(ex_cmd, list) and isinstance(sy_cmd, list)):
+            continue
+        base_examples = {pattern.sub('', e) for e in ex_cmd if isinstance(e, str)}
+        for s in sy_cmd:
+            if not isinstance(s, str):
+                continue
+            base = pattern.sub('', s)
+            if base in base_examples:
+                violations.append(f"{data.get('display_name')}: '{s}' duplicates example '{base}'")
+    assert not violations, 'synergy_commanders contain duplicates of example_commanders: ' + '; '.join(violations)
+
+
+def test_mapping_trigger_specialization_guard():
+    import yaml  # type: ignore
+    assert MAPPING.exists(), "description_mapping.yml missing"
+    mapping_yaml = yaml.safe_load(MAPPING.read_text(encoding='utf-8')) or []
+    triggers: Set[str] = set()
+    for item in mapping_yaml:
+        if isinstance(item, dict) and 'triggers' in item and isinstance(item['triggers'], list):
+            for t in item['triggers']:
+                if isinstance(t, str) and t.strip():
+                    triggers.add(t.lower())
+    catalog = _load_catalog()
+    generic_themes: List[str] = []
+    for entry in catalog['themes']:
+        theme = str(entry.get('theme') or '')
+        desc = str(entry.get('description') or '')
+        lower = theme.lower()
+        if not theme or not desc:
+            continue
+        # Generic detection: Starts with 'Builds around' (tribal phrasing allowed as non-generic)
+        if not desc.startswith('Builds around'):
+            continue
+        if any(trig in lower for trig in triggers):
+            generic_themes.append(theme)
+    assert not generic_themes, (
+        'Themes matched by description mapping triggers still have generic fallback descriptions: ' + ', '.join(sorted(generic_themes))
+    )
--- a/code/tests/test_synergy_pairs_and_metadata_info.py
+++ b/code/tests/test_synergy_pairs_and_metadata_info.py
@ -0,0 +1,49 @@
+import json
+import os
+from pathlib import Path
+import subprocess
+
+ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = ROOT / 'code' / 'scripts' / 'build_theme_catalog.py'
+CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
+
+
+def run(cmd, env=None):
+    env_vars = os.environ.copy()
+    # Ensure code/ is on PYTHONPATH for script relative imports
+    existing_pp = env_vars.get('PYTHONPATH', '')
+    code_path = str(ROOT / 'code')
+    if code_path not in existing_pp.split(os.pathsep):
+        env_vars['PYTHONPATH'] = (existing_pp + os.pathsep + code_path) if existing_pp else code_path
+    if env:
+        env_vars.update(env)
+    result = subprocess.run(cmd, cwd=ROOT, env=env_vars, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise AssertionError(f"Command failed: {' '.join(cmd)}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}")
+    return result.stdout, result.stderr
+
+
+def test_synergy_pairs_fallback_and_metadata_info(tmp_path):
+    """Validate that a theme with empty curated_synergies in YAML picks up fallback from
+    synergy_pairs.yml and that backfill stamps metadata_info (formerly provenance) +
+    popularity/description when forced.
+    """
+    out_path = tmp_path / 'theme_list.json'
+    run(['python', str(SCRIPT), '--output', str(out_path)], env={'EDITORIAL_SEED': '42'})
+    data = json.loads(out_path.read_text(encoding='utf-8'))
+    themes = {t['theme']: t for t in data['themes']}
+    search_pool = (
+        'Treasure','Tokens','Proliferate','Aristocrats','Sacrifice','Landfall','Graveyard','Reanimate'
+    )
+    candidate = next((name for name in search_pool if name in themes), None)
+    if not candidate:  # environment variability safeguard
+        import pytest
+        pytest.skip('No synergy pair seed theme present in catalog output')
+    candidate_entry = themes[candidate]
+    assert candidate_entry.get('synergies'), f"{candidate} has no synergies; fallback failed"
+    run(['python', str(SCRIPT), '--force-backfill-yaml', '--backfill-yaml'], env={'EDITORIAL_INCLUDE_FALLBACK_SUMMARY': '1'})
+    yaml_path = CATALOG_DIR / f"{candidate.lower().replace(' ', '-')}.yml"
+    if yaml_path.exists():
+        raw = yaml_path.read_text(encoding='utf-8').splitlines()
+        has_meta = any(line.strip().startswith(('metadata_info:','provenance:')) for line in raw)
+        assert has_meta, 'metadata_info block missing after forced backfill'
--- a/code/tests/test_synergy_pairs_and_provenance.py
+++ b/code/tests/test_synergy_pairs_and_provenance.py
@ -0,0 +1,59 @@
+import json
+import os
+from pathlib import Path
+import subprocess
+
+ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = ROOT / 'code' / 'scripts' / 'build_theme_catalog.py'
+CATALOG_DIR = ROOT / 'config' / 'themes' / 'catalog'
+
+
+def run(cmd, env=None):
+    env_vars = os.environ.copy()
+    # Ensure code/ is on PYTHONPATH for script relative imports
+    existing_pp = env_vars.get('PYTHONPATH', '')
+    code_path = str(ROOT / 'code')
+    if code_path not in existing_pp.split(os.pathsep):
+        env_vars['PYTHONPATH'] = (existing_pp + os.pathsep + code_path) if existing_pp else code_path
+    if env:
+        env_vars.update(env)
+    result = subprocess.run(cmd, cwd=ROOT, env=env_vars, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise AssertionError(f"Command failed: {' '.join(cmd)}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}")
+    return result.stdout, result.stderr
+
+
+def test_synergy_pairs_fallback_and_metadata_info(tmp_path):
+    """Validate that a theme with empty curated_synergies in YAML picks up fallback from synergy_pairs.yml
+    and that backfill stamps metadata_info (formerly provenance) + popularity/description when forced.
+    """
+    # Pick a catalog file we can safely mutate (copy to temp and operate on copy via output override, then force backfill real one)
+    # We'll choose a theme that likely has few curated synergies to increase chance fallback applies; if not found, just assert mapping works generically.
+    out_path = tmp_path / 'theme_list.json'
+    # Limit to keep runtime fast but ensure target theme appears
+    run(['python', str(SCRIPT), '--output', str(out_path)], env={'EDITORIAL_SEED': '42'})
+    data = json.loads(out_path.read_text(encoding='utf-8'))
+    themes = {t['theme']: t for t in data['themes']}
+    # Pick one known from synergy_pairs.yml (e.g., 'Treasure', 'Tokens', 'Proliferate')
+    candidate = None
+    search_pool = (
+        'Treasure','Tokens','Proliferate','Aristocrats','Sacrifice','Landfall','Graveyard','Reanimate'
+    )
+    for name in search_pool:
+        if name in themes:
+            candidate = name
+            break
+    if not candidate:  # If still none, skip test rather than fail (environmental variability)
+        import pytest
+        pytest.skip('No synergy pair seed theme present in catalog output')
+    candidate_entry = themes[candidate]
+    # Must have at least one synergy (fallback or curated)
+    assert candidate_entry.get('synergies'), f"{candidate} has no synergies; fallback failed"
+    # Force backfill (real JSON path triggers backfill) with environment to ensure provenance stamping
+    run(['python', str(SCRIPT), '--force-backfill-yaml', '--backfill-yaml'], env={'EDITORIAL_INCLUDE_FALLBACK_SUMMARY': '1'})
+    # Locate YAML and verify metadata_info (or legacy provenance) inserted
+    yaml_path = CATALOG_DIR / f"{candidate.lower().replace(' ', '-')}.yml"
+    if yaml_path.exists():
+        raw = yaml_path.read_text(encoding='utf-8').splitlines()
+    has_meta = any(line.strip().startswith(('metadata_info:','provenance:')) for line in raw)
+    assert has_meta, 'metadata_info block missing after forced backfill'
--- a/code/tests/test_theme_catalog_generation.py
+++ b/code/tests/test_theme_catalog_generation.py
@ -0,0 +1,62 @@
+import json
+import os
+from pathlib import Path
+import subprocess
+
+ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = ROOT / 'code' / 'scripts' / 'build_theme_catalog.py'
+
+
+def run(cmd, env=None):
+    env_vars = os.environ.copy()
+    if env:
+        env_vars.update(env)
+    result = subprocess.run(cmd, cwd=ROOT, env=env_vars, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise AssertionError(f"Command failed: {' '.join(cmd)}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}")
+    return result.stdout, result.stderr
+
+
+def test_deterministic_seed(tmp_path):
+    out1 = tmp_path / 'theme_list1.json'
+    out2 = tmp_path / 'theme_list2.json'
+    cmd_base = ['python', str(SCRIPT), '--output']
+    # Use a limit to keep runtime fast and deterministic small subset (allowed by guard since different output path)
+    cmd1 = cmd_base + [str(out1), '--limit', '50']
+    cmd2 = cmd_base + [str(out2), '--limit', '50']
+    run(cmd1, env={'EDITORIAL_SEED': '123'})
+    run(cmd2, env={'EDITORIAL_SEED': '123'})
+    data1 = json.loads(out1.read_text(encoding='utf-8'))
+    data2 = json.loads(out2.read_text(encoding='utf-8'))
+    # Theme order in JSON output should match for same seed + limit
+    names1 = [t['theme'] for t in data1['themes']]
+    names2 = [t['theme'] for t in data2['themes']]
+    assert names1 == names2
+
+
+def test_popularity_boundaries_override(tmp_path):
+    out_path = tmp_path / 'theme_list.json'
+    run(['python', str(SCRIPT), '--output', str(out_path), '--limit', '80'], env={'EDITORIAL_POP_BOUNDARIES': '1,2,3,4'})
+    data = json.loads(out_path.read_text(encoding='utf-8'))
+    # With extremely low boundaries most themes in small slice will be Very Common
+    buckets = {t['popularity_bucket'] for t in data['themes']}
+    assert buckets <= {'Very Common', 'Common', 'Uncommon', 'Niche', 'Rare'}
+
+
+def test_no_yaml_backfill_on_alt_output(tmp_path):
+    # Run with alternate output and --backfill-yaml; should not modify source YAMLs
+    catalog_dir = ROOT / 'config' / 'themes' / 'catalog'
+    sample = next(p for p in catalog_dir.glob('*.yml'))
+    before = sample.read_text(encoding='utf-8')
+    out_path = tmp_path / 'tl.json'
+    run(['python', str(SCRIPT), '--output', str(out_path), '--limit', '10', '--backfill-yaml'])
+    after = sample.read_text(encoding='utf-8')
+    assert before == after, 'YAML was modified when using alternate output path'
+
+
+def test_catalog_schema_contains_descriptions(tmp_path):
+    out_path = tmp_path / 'theme_list.json'
+    run(['python', str(SCRIPT), '--output', str(out_path), '--limit', '40'])
+    data = json.loads(out_path.read_text(encoding='utf-8'))
+    assert all('description' in t for t in data['themes'])
+    assert all(t['description'] for t in data['themes'])
--- a/code/tests/test_theme_catalog_validation_phase_c.py
+++ b/code/tests/test_theme_catalog_validation_phase_c.py
@ -86,7 +86,7 @@ def test_strict_alias_mode_passes_current_state():
 def test_synergy_cap_global():
    ensure_catalog()
    data = json.loads(CATALOG.read_text(encoding='utf-8'))
-    cap = data.get('provenance', {}).get('synergy_cap') or 0
+    cap = (data.get('metadata_info') or {}).get('synergy_cap') or 0
    if not cap:
        return
    for entry in data.get('themes', [])[:200]:  # sample subset for speed
--- a/code/tests/test_theme_description_fallback_regression.py
+++ b/code/tests/test_theme_description_fallback_regression.py
@ -0,0 +1,33 @@
+import json
+import os
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = ROOT / 'code' / 'scripts' / 'build_theme_catalog.py'
+OUTPUT = ROOT / 'config' / 'themes' / 'theme_list_test_regression.json'
+
+
+def test_generic_description_regression():
+    # Run build with summary enabled directed to temp output
+    env = os.environ.copy()
+    env['EDITORIAL_INCLUDE_FALLBACK_SUMMARY'] = '1'
+    # Avoid writing real catalog file; just produce alternate output
+    import subprocess
+    import sys
+    cmd = [sys.executable, str(SCRIPT), '--output', str(OUTPUT)]
+    res = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    assert res.returncode == 0, res.stderr
+    data = json.loads(OUTPUT.read_text(encoding='utf-8'))
+    summary = data.get('description_fallback_summary') or {}
+    # Guardrails tightened (second wave). Prior baseline: ~357 generic (309 + 48).
+    # New ceiling: <= 365 total generic and <52% share. Future passes should lower further.
+    assert summary.get('generic_total', 0) <= 365, summary
+    assert summary.get('generic_pct', 100.0) < 52.0, summary
+    # Basic shape checks
+    assert 'top_generic_by_frequency' in summary
+    assert isinstance(summary['top_generic_by_frequency'], list)
+    # Clean up temp output file
+    try:
+        OUTPUT.unlink()
+    except Exception:
+        pass
--- a/code/tests/test_theme_editorial_min_examples_enforced.py
+++ b/code/tests/test_theme_editorial_min_examples_enforced.py
@ -0,0 +1,33 @@
+"""Enforcement Test: Minimum example_commanders threshold.
+
+This test asserts that when enforcement flag is active (env EDITORIAL_MIN_EXAMPLES_ENFORCE=1)
+no theme present in the merged catalog falls below the configured minimum (default 5).
+
+Rationale: Guards against regressions where a future edit drops curated coverage
+below the policy threshold after Phase D close-out.
+"""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import json
+
+ROOT = Path(__file__).resolve().parents[2]
+CATALOG = ROOT / 'config' / 'themes' / 'theme_list.json'
+
+
+def test_all_themes_meet_minimum_examples():
+    os.environ['EDITORIAL_MIN_EXAMPLES_ENFORCE'] = '1'
+    min_required = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
+    assert CATALOG.exists(), 'theme_list.json missing (run build script before tests)'
+    data = json.loads(CATALOG.read_text(encoding='utf-8'))
+    assert 'themes' in data
+    short = []
+    for entry in data['themes']:
+        # Skip synthetic / alias entries if any (identified by metadata_info.alias_of later if introduced)
+        if entry.get('alias_of'):
+            continue
+        examples = entry.get('example_commanders') or []
+        if len(examples) < min_required:
+            short.append(f"{entry.get('theme')}: {len(examples)} < {min_required}")
+    assert not short, 'Themes below minimum examples: ' + ', '.join(short)
--- a/code/tests/test_theme_merge_phase_b.py
+++ b/code/tests/test_theme_merge_phase_b.py
@ -23,16 +23,16 @@ def load_catalog():
    return data, themes


-def test_phase_b_merge_provenance_and_precedence():
+def test_phase_b_merge_metadata_info_and_precedence():
    run_builder()
    data, themes = load_catalog()

-    # Provenance block required
-    prov = data.get('provenance')
-    assert isinstance(prov, dict), 'Provenance block missing'
-    assert prov.get('mode') == 'merge', 'Provenance mode should be merge'
-    assert 'generated_at' in prov, 'generated_at missing in provenance'
-    assert 'curated_yaml_files' in prov, 'curated_yaml_files missing in provenance'
+    # metadata_info block required (legacy 'provenance' accepted transiently)
+    meta = data.get('metadata_info') or data.get('provenance')
+    assert isinstance(meta, dict), 'metadata_info block missing'
+    assert meta.get('mode') == 'merge', 'metadata_info mode should be merge'
+    assert 'generated_at' in meta, 'generated_at missing in metadata_info'
+    assert 'curated_yaml_files' in meta, 'curated_yaml_files missing in metadata_info'

    # Sample anchors to verify curated/enforced precedence not truncated under cap
    # Choose +1/+1 Counters (curated + enforced) and Reanimate (curated + enforced)
@ -50,7 +50,7 @@ def test_phase_b_merge_provenance_and_precedence():
            assert 'Enter the Battlefield' in syn, 'Curated synergy lost due to capping'

    # Ensure cap respected (soft exceed allowed only if curated+enforced exceed cap)
-    cap = data.get('provenance', {}).get('synergy_cap') or 0
+    cap = (data.get('metadata_info') or {}).get('synergy_cap') or 0
    if cap:
        for t, entry in list(themes.items())[:50]:  # sample first 50 for speed
            if len(entry['synergies']) > cap: