feat(random): finalize multi-theme telemetry and polish

- document random theme exclusions, perf guard tooling, and roadmap completion - tighten random reroll UX: strict theme persistence, throttle handling, export parity, diagnostics updates - add regression coverage for telemetry counters, multi-theme flows, and locked rerolls; refresh README and notes Tests: pytest -q (fast random + telemetry suites)
2026-03-19 11:46:30 +01:00 · 2025-09-26 18:15:52 -07:00 · 2025-09-26 18:15:52 -07:00 · 49f1f8b2eb
commit 49f1f8b2eb
parent 73685f22c8
28 changed files with 4888 additions and 251 deletions
--- a/code/scripts/report_random_theme_pool.py
+++ b/code/scripts/report_random_theme_pool.py
@ -0,0 +1,193 @@
+"""Summarize the curated random theme pool and exclusion rules.
+
+Usage examples:
+
+    python -m code.scripts.report_random_theme_pool --format markdown
+    python -m code.scripts.report_random_theme_pool --output logs/random_theme_pool.json
+
+The script refreshes the commander catalog, rebuilds the curated random
+pool using the same heuristics as Random Mode auto-fill, and prints a
+summary (JSON by default).
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.append(str(PROJECT_ROOT))
+
+from deck_builder.random_entrypoint import (  # type: ignore  # noqa: E402
+    _build_random_theme_pool,
+    _ensure_theme_tag_cache,
+    _load_commanders_df,
+    _OVERREPRESENTED_SHARE_THRESHOLD,
+)
+
+
+def build_report(refresh: bool = False) -> Dict[str, Any]:
+    df = _load_commanders_df()
+    if refresh:
+        # Force re-cache of tag structures
+        df = _ensure_theme_tag_cache(df)
+    else:
+        try:
+            df = _ensure_theme_tag_cache(df)
+        except Exception:
+            pass
+    allowed, metadata = _build_random_theme_pool(df, include_details=True)
+    detail = metadata.pop("excluded_detail", {})
+    report = {
+        "allowed_tokens": sorted(allowed),
+        "allowed_count": len(allowed),
+        "metadata": metadata,
+        "excluded_detail": detail,
+    }
+    return report
+
+
+def format_markdown(report: Dict[str, Any], *, limit: int = 20) -> str:
+    lines: List[str] = []
+    meta = report.get("metadata", {})
+    rules = meta.get("rules", {})
+    lines.append("# Curated Random Theme Pool")
+    lines.append("")
+    lines.append(f"- Allowed tokens: **{report.get('allowed_count', 0)}**")
+    total_commander_count = meta.get("total_commander_count")
+    if total_commander_count is not None:
+        lines.append(f"- Commander entries analyzed: **{total_commander_count}**")
+    coverage = meta.get("coverage_ratio")
+    if coverage is not None:
+        pct = round(float(coverage) * 100.0, 2)
+        lines.append(f"- Coverage: **{pct}%** of catalog tokens")
+    if rules:
+        thresh = rules.get("overrepresented_share_threshold", _OVERREPRESENTED_SHARE_THRESHOLD)
+        thresh_pct = round(float(thresh) * 100.0, 2)
+        lines.append("- Exclusion rules:")
+        lines.append("  - Minimum commander coverage: 5 unique commanders")
+        lines.append(f"  - Kindred filter keywords: {', '.join(rules.get('kindred_keywords', []))}")
+        lines.append(f"  - Global theme keywords: {', '.join(rules.get('excluded_keywords', []))}")
+        pattern_str = ", ".join(rules.get("excluded_patterns", []))
+        if pattern_str:
+            lines.append(f"  - Global theme patterns: {pattern_str}")
+        lines.append(f"  - Over-represented threshold: ≥ {thresh_pct}% of commanders")
+        manual_src = rules.get("manual_exclusions_source")
+        manual_groups = rules.get("manual_exclusions") or []
+        if manual_src or manual_groups:
+            lines.append(f"  - Manual exclusion config: {manual_src or 'config/random_theme_exclusions.yml'}")
+        if manual_groups:
+            lines.append(f"  - Manual categories: {len(manual_groups)} tracked groups")
+    counts = meta.get("excluded_counts", {}) or {}
+    if counts:
+        lines.append("")
+        lines.append("## Excluded tokens by reason")
+        lines.append("Reason | Count")
+        lines.append("------ | -----")
+        for reason, count in sorted(counts.items(), key=lambda item: item[0]):
+            lines.append(f"{reason} | {count}")
+    samples = meta.get("excluded_samples", {}) or {}
+    if samples:
+        lines.append("")
+        lines.append("## Sample tokens per exclusion reason")
+        for reason, tokens in sorted(samples.items(), key=lambda item: item[0]):
+            subset = tokens[:limit]
+            more = "" if len(tokens) <= limit else f" … (+{len(tokens) - limit})"
+            lines.append(f"- **{reason}**: {', '.join(subset)}{more}")
+    detail = report.get("excluded_detail", {}) or {}
+    if detail:
+        lines.append("")
+        lines.append("## Detailed exclusions (first few)")
+        for token, reasons in list(sorted(detail.items()))[:limit]:
+            lines.append(f"- {token}: {', '.join(reasons)}")
+        if len(detail) > limit:
+            lines.append(f"… (+{len(detail) - limit} more tokens)")
+    manual_detail = meta.get("manual_exclusion_detail", {}) or {}
+    if manual_detail:
+        lines.append("")
+        lines.append("## Manual exclusions applied")
+        for token, info in sorted(manual_detail.items(), key=lambda item: item[0]):
+            display = info.get("display", token)
+            category = info.get("category")
+            summary = info.get("summary")
+            notes = info.get("notes")
+            descriptors: List[str] = []
+            if category:
+                descriptors.append(f"category={category}")
+            if summary:
+                descriptors.append(summary)
+            if notes:
+                descriptors.append(notes)
+            suffix = f" — {'; '.join(descriptors)}" if descriptors else ""
+            lines.append(f"- {display}{suffix}")
+
+    if rules.get("manual_exclusions"):
+        lines.append("")
+        lines.append("## Manual exclusion categories")
+        for group in rules["manual_exclusions"]:
+            if not isinstance(group, dict):
+                continue
+            category = group.get("category", "manual")
+            summary = group.get("summary")
+            tokens = group.get("tokens", []) or []
+            notes = group.get("notes")
+            lines.append(f"- **{category}** — {summary or 'no summary provided'}")
+            if notes:
+                lines.append(f"  - Notes: {notes}")
+            if tokens:
+                token_list = tokens[:limit]
+                more = "" if len(tokens) <= limit else f" … (+{len(tokens) - limit})"
+                lines.append(f"  - Tokens: {', '.join(token_list)}{more}")
+
+    return "\n".join(lines)
+
+
+def write_output(path: Path, payload: Dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        json.dump(payload, handle, indent=2, sort_keys=True)
+        handle.write("\n")
+
+
+def write_manual_exclusions(path: Path, report: Dict[str, Any]) -> None:
+    meta = report.get("metadata", {}) or {}
+    rules = meta.get("rules", {}) or {}
+    detail = meta.get("manual_exclusion_detail", {}) or {}
+    payload = {
+        "source": rules.get("manual_exclusions_source"),
+        "categories": rules.get("manual_exclusions", []),
+        "tokens": detail,
+    }
+    write_output(path, payload)
+
+
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Report the curated random theme pool heuristics")
+    parser.add_argument("--format", choices={"json", "markdown"}, default="json", help="Output format (default: json)")
+    parser.add_argument("--output", type=Path, help="Optional path to write the structured report (JSON regardless of --format)")
+    parser.add_argument("--limit", type=int, default=20, help="Max sample tokens per reason when printing markdown (default: 20)")
+    parser.add_argument("--refresh", action="store_true", help="Bypass caches when rebuilding commander stats")
+    parser.add_argument("--write-exclusions", type=Path, help="Optional path for writing manual exclusion tokens + metadata (JSON)")
+    args = parser.parse_args(argv)
+
+    report = build_report(refresh=args.refresh)
+
+    if args.output:
+        write_output(args.output, report)
+
+    if args.write_exclusions:
+        write_manual_exclusions(args.write_exclusions, report)
+
+    if args.format == "markdown":
+        print(format_markdown(report, limit=max(1, args.limit)))
+    else:
+        print(json.dumps(report, indent=2, sort_keys=True))
+
+    return 0
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())