mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 15:40:12 +01:00
193 lines
7.9 KiB
Python
193 lines
7.9 KiB
Python
"""Summarize the curated random theme pool and exclusion rules.
|
|
|
|
Usage examples:
|
|
|
|
python -m code.scripts.report_random_theme_pool --format markdown
|
|
python -m code.scripts.report_random_theme_pool --output logs/random_theme_pool.json
|
|
|
|
The script refreshes the commander catalog, rebuilds the curated random
|
|
pool using the same heuristics as Random Mode auto-fill, and prints a
|
|
summary (JSON by default).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
|
if str(PROJECT_ROOT) not in sys.path:
|
|
sys.path.append(str(PROJECT_ROOT))
|
|
|
|
from deck_builder.random_entrypoint import ( # noqa: E402
|
|
_build_random_theme_pool,
|
|
_ensure_theme_tag_cache,
|
|
_load_commanders_df,
|
|
_OVERREPRESENTED_SHARE_THRESHOLD,
|
|
)
|
|
|
|
|
|
def build_report(refresh: bool = False) -> Dict[str, Any]:
|
|
df = _load_commanders_df()
|
|
if refresh:
|
|
# Force re-cache of tag structures
|
|
df = _ensure_theme_tag_cache(df)
|
|
else:
|
|
try:
|
|
df = _ensure_theme_tag_cache(df)
|
|
except Exception:
|
|
pass
|
|
allowed, metadata = _build_random_theme_pool(df, include_details=True)
|
|
detail = metadata.pop("excluded_detail", {})
|
|
report = {
|
|
"allowed_tokens": sorted(allowed),
|
|
"allowed_count": len(allowed),
|
|
"metadata": metadata,
|
|
"excluded_detail": detail,
|
|
}
|
|
return report
|
|
|
|
|
|
def format_markdown(report: Dict[str, Any], *, limit: int = 20) -> str:
|
|
lines: List[str] = []
|
|
meta = report.get("metadata", {})
|
|
rules = meta.get("rules", {})
|
|
lines.append("# Curated Random Theme Pool")
|
|
lines.append("")
|
|
lines.append(f"- Allowed tokens: **{report.get('allowed_count', 0)}**")
|
|
total_commander_count = meta.get("total_commander_count")
|
|
if total_commander_count is not None:
|
|
lines.append(f"- Commander entries analyzed: **{total_commander_count}**")
|
|
coverage = meta.get("coverage_ratio")
|
|
if coverage is not None:
|
|
pct = round(float(coverage) * 100.0, 2)
|
|
lines.append(f"- Coverage: **{pct}%** of catalog tokens")
|
|
if rules:
|
|
thresh = rules.get("overrepresented_share_threshold", _OVERREPRESENTED_SHARE_THRESHOLD)
|
|
thresh_pct = round(float(thresh) * 100.0, 2)
|
|
lines.append("- Exclusion rules:")
|
|
lines.append(" - Minimum commander coverage: 5 unique commanders")
|
|
lines.append(f" - Kindred filter keywords: {', '.join(rules.get('kindred_keywords', []))}")
|
|
lines.append(f" - Global theme keywords: {', '.join(rules.get('excluded_keywords', []))}")
|
|
pattern_str = ", ".join(rules.get("excluded_patterns", []))
|
|
if pattern_str:
|
|
lines.append(f" - Global theme patterns: {pattern_str}")
|
|
lines.append(f" - Over-represented threshold: ≥ {thresh_pct}% of commanders")
|
|
manual_src = rules.get("manual_exclusions_source")
|
|
manual_groups = rules.get("manual_exclusions") or []
|
|
if manual_src or manual_groups:
|
|
lines.append(f" - Manual exclusion config: {manual_src or 'config/random_theme_exclusions.yml'}")
|
|
if manual_groups:
|
|
lines.append(f" - Manual categories: {len(manual_groups)} tracked groups")
|
|
counts = meta.get("excluded_counts", {}) or {}
|
|
if counts:
|
|
lines.append("")
|
|
lines.append("## Excluded tokens by reason")
|
|
lines.append("Reason | Count")
|
|
lines.append("------ | -----")
|
|
for reason, count in sorted(counts.items(), key=lambda item: item[0]):
|
|
lines.append(f"{reason} | {count}")
|
|
samples = meta.get("excluded_samples", {}) or {}
|
|
if samples:
|
|
lines.append("")
|
|
lines.append("## Sample tokens per exclusion reason")
|
|
for reason, tokens in sorted(samples.items(), key=lambda item: item[0]):
|
|
subset = tokens[:limit]
|
|
more = "" if len(tokens) <= limit else f" … (+{len(tokens) - limit})"
|
|
lines.append(f"- **{reason}**: {', '.join(subset)}{more}")
|
|
detail = report.get("excluded_detail", {}) or {}
|
|
if detail:
|
|
lines.append("")
|
|
lines.append("## Detailed exclusions (first few)")
|
|
for token, reasons in list(sorted(detail.items()))[:limit]:
|
|
lines.append(f"- {token}: {', '.join(reasons)}")
|
|
if len(detail) > limit:
|
|
lines.append(f"… (+{len(detail) - limit} more tokens)")
|
|
manual_detail = meta.get("manual_exclusion_detail", {}) or {}
|
|
if manual_detail:
|
|
lines.append("")
|
|
lines.append("## Manual exclusions applied")
|
|
for token, info in sorted(manual_detail.items(), key=lambda item: item[0]):
|
|
display = info.get("display", token)
|
|
category = info.get("category")
|
|
summary = info.get("summary")
|
|
notes = info.get("notes")
|
|
descriptors: List[str] = []
|
|
if category:
|
|
descriptors.append(f"category={category}")
|
|
if summary:
|
|
descriptors.append(summary)
|
|
if notes:
|
|
descriptors.append(notes)
|
|
suffix = f" — {'; '.join(descriptors)}" if descriptors else ""
|
|
lines.append(f"- {display}{suffix}")
|
|
|
|
if rules.get("manual_exclusions"):
|
|
lines.append("")
|
|
lines.append("## Manual exclusion categories")
|
|
for group in rules["manual_exclusions"]:
|
|
if not isinstance(group, dict):
|
|
continue
|
|
category = group.get("category", "manual")
|
|
summary = group.get("summary")
|
|
tokens = group.get("tokens", []) or []
|
|
notes = group.get("notes")
|
|
lines.append(f"- **{category}** — {summary or 'no summary provided'}")
|
|
if notes:
|
|
lines.append(f" - Notes: {notes}")
|
|
if tokens:
|
|
token_list = tokens[:limit]
|
|
more = "" if len(tokens) <= limit else f" … (+{len(tokens) - limit})"
|
|
lines.append(f" - Tokens: {', '.join(token_list)}{more}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def write_output(path: Path, payload: Dict[str, Any]) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
json.dump(payload, handle, indent=2, sort_keys=True)
|
|
handle.write("\n")
|
|
|
|
|
|
def write_manual_exclusions(path: Path, report: Dict[str, Any]) -> None:
|
|
meta = report.get("metadata", {}) or {}
|
|
rules = meta.get("rules", {}) or {}
|
|
detail = meta.get("manual_exclusion_detail", {}) or {}
|
|
payload = {
|
|
"source": rules.get("manual_exclusions_source"),
|
|
"categories": rules.get("manual_exclusions", []),
|
|
"tokens": detail,
|
|
}
|
|
write_output(path, payload)
|
|
|
|
|
|
def main(argv: List[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(description="Report the curated random theme pool heuristics")
|
|
parser.add_argument("--format", choices={"json", "markdown"}, default="json", help="Output format (default: json)")
|
|
parser.add_argument("--output", type=Path, help="Optional path to write the structured report (JSON regardless of --format)")
|
|
parser.add_argument("--limit", type=int, default=20, help="Max sample tokens per reason when printing markdown (default: 20)")
|
|
parser.add_argument("--refresh", action="store_true", help="Bypass caches when rebuilding commander stats")
|
|
parser.add_argument("--write-exclusions", type=Path, help="Optional path for writing manual exclusion tokens + metadata (JSON)")
|
|
args = parser.parse_args(argv)
|
|
|
|
report = build_report(refresh=args.refresh)
|
|
|
|
if args.output:
|
|
write_output(args.output, report)
|
|
|
|
if args.write_exclusions:
|
|
write_manual_exclusions(args.write_exclusions, report)
|
|
|
|
if args.format == "markdown":
|
|
print(format_markdown(report, limit=max(1, args.limit)))
|
|
else:
|
|
print(json.dumps(report, indent=2, sort_keys=True))
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__": # pragma: no cover
|
|
raise SystemExit(main())
|