mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat: add supplemental theme catalog tooling, additional theme selection, and custom theme selection
This commit is contained in:
parent
3a1b011dbc
commit
9428e09cef
39 changed files with 3643 additions and 198 deletions
281
code/scripts/generate_theme_catalog.py
Normal file
281
code/scripts/generate_theme_catalog.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
"""Generate a normalized theme catalog CSV from card datasets.
|
||||
|
||||
Outputs `theme_catalog.csv` with deterministic ordering, a reproducible version hash,
|
||||
and per-source occurrence counts so supplemental theme workflows can reuse the catalog.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import csv
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Sequence
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
CODE_ROOT = ROOT / "code"
|
||||
if str(CODE_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(CODE_ROOT))
|
||||
|
||||
try:
|
||||
from code.settings import CSV_DIRECTORY as DEFAULT_CSV_DIRECTORY # type: ignore
|
||||
except Exception: # pragma: no cover - fallback for adhoc execution
|
||||
DEFAULT_CSV_DIRECTORY = "csv_files"
|
||||
|
||||
DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
|
||||
HEADER_COMMENT_PREFIX = "# theme_catalog"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CatalogRow:
|
||||
theme: str
|
||||
source_count: int
|
||||
commander_count: int
|
||||
card_count: int
|
||||
last_generated_at: str
|
||||
version: str
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CatalogBuildResult:
|
||||
rows: List[CatalogRow]
|
||||
generated_at: str
|
||||
version: str
|
||||
output_path: Path
|
||||
|
||||
|
||||
def normalize_theme_display(raw: str) -> str:
|
||||
trimmed = " ".join(raw.strip().split())
|
||||
return trimmed
|
||||
|
||||
|
||||
def canonical_key(raw: str) -> str:
|
||||
return normalize_theme_display(raw).casefold()
|
||||
|
||||
|
||||
def parse_theme_tags(value: object) -> List[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return [str(v) for v in value if isinstance(v, str) and v.strip()]
|
||||
if isinstance(value, str):
|
||||
candidate = value.strip()
|
||||
if not candidate:
|
||||
return []
|
||||
# Try JSON parsing first (themeTags often stored as JSON arrays)
|
||||
try:
|
||||
parsed = json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
if isinstance(parsed, list):
|
||||
return [str(v) for v in parsed if isinstance(v, str) and v.strip()]
|
||||
# Fallback to Python literal lists
|
||||
try:
|
||||
literal = ast.literal_eval(candidate)
|
||||
except (ValueError, SyntaxError):
|
||||
literal = None
|
||||
if isinstance(literal, list):
|
||||
return [str(v) for v in literal if isinstance(v, str) and v.strip()]
|
||||
return [candidate]
|
||||
return []
|
||||
|
||||
|
||||
def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
|
||||
counts: Counter[str] = Counter()
|
||||
if not csv_path.exists():
|
||||
return counts
|
||||
with csv_path.open("r", encoding="utf-8-sig", newline="") as handle:
|
||||
reader = csv.DictReader(handle)
|
||||
if not reader.fieldnames or "themeTags" not in reader.fieldnames:
|
||||
return counts
|
||||
for row in reader:
|
||||
raw_value = row.get("themeTags")
|
||||
tags = parse_theme_tags(raw_value)
|
||||
if not tags:
|
||||
continue
|
||||
seen_in_row: set[str] = set()
|
||||
for tag in tags:
|
||||
display = normalize_theme_display(tag)
|
||||
if not display:
|
||||
continue
|
||||
key = canonical_key(display)
|
||||
if key in seen_in_row:
|
||||
continue
|
||||
seen_in_row.add(key)
|
||||
counts[key] += 1
|
||||
theme_variants[key].add(display)
|
||||
return counts
|
||||
|
||||
|
||||
def _select_display_name(options: Sequence[str]) -> str:
|
||||
if not options:
|
||||
return ""
|
||||
|
||||
def ranking(value: str) -> tuple[int, int, str, str]:
|
||||
all_upper = int(value == value.upper())
|
||||
title_case = int(value != value.title())
|
||||
return (all_upper, title_case, value.casefold(), value)
|
||||
|
||||
return min(options, key=ranking)
|
||||
|
||||
|
||||
def _derive_generated_at(now: Optional[datetime] = None) -> str:
|
||||
current = now or datetime.now(timezone.utc)
|
||||
without_microseconds = current.replace(microsecond=0)
|
||||
iso = without_microseconds.isoformat()
|
||||
return iso.replace("+00:00", "Z")
|
||||
|
||||
|
||||
def _compute_version_hash(theme_names: Iterable[str]) -> str:
|
||||
joined = "\n".join(sorted(theme_names)).encode("utf-8")
|
||||
return hashlib.sha256(joined).hexdigest()[:12]
|
||||
|
||||
|
||||
def build_theme_catalog(
|
||||
csv_directory: Path,
|
||||
output_path: Path,
|
||||
*,
|
||||
generated_at: Optional[datetime] = None,
|
||||
commander_filename: str = "commander_cards.csv",
|
||||
cards_filename: str = "cards.csv",
|
||||
logs_directory: Optional[Path] = None,
|
||||
) -> CatalogBuildResult:
|
||||
csv_directory = csv_directory.resolve()
|
||||
output_path = output_path.resolve()
|
||||
|
||||
theme_variants: Dict[str, set[str]] = defaultdict(set)
|
||||
|
||||
commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
|
||||
|
||||
card_counts: Counter[str] = Counter()
|
||||
cards_path = csv_directory / cards_filename
|
||||
if cards_path.exists():
|
||||
card_counts = _load_theme_counts(cards_path, theme_variants)
|
||||
else:
|
||||
# Fallback: scan all *_cards.csv except commander
|
||||
for candidate in csv_directory.glob("*_cards.csv"):
|
||||
if candidate.name == commander_filename:
|
||||
continue
|
||||
card_counts += _load_theme_counts(candidate, theme_variants)
|
||||
|
||||
keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
|
||||
generated_at_iso = _derive_generated_at(generated_at)
|
||||
display_names = [_select_display_name(sorted(theme_variants[key])) for key in keys]
|
||||
version_hash = _compute_version_hash(display_names)
|
||||
|
||||
rows: List[CatalogRow] = []
|
||||
for key, display in zip(keys, display_names):
|
||||
if not display:
|
||||
continue
|
||||
card_count = int(card_counts.get(key, 0))
|
||||
commander_count = int(commander_counts.get(key, 0))
|
||||
source_count = card_count + commander_count
|
||||
rows.append(
|
||||
CatalogRow(
|
||||
theme=display,
|
||||
source_count=source_count,
|
||||
commander_count=commander_count,
|
||||
card_count=card_count,
|
||||
last_generated_at=generated_at_iso,
|
||||
version=version_hash,
|
||||
)
|
||||
)
|
||||
|
||||
rows.sort(key=lambda row: (row.theme.casefold(), row.theme))
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("w", encoding="utf-8", newline="") as handle:
|
||||
comment = (
|
||||
f"{HEADER_COMMENT_PREFIX} version={version_hash} "
|
||||
f"generated_at={generated_at_iso} total_themes={len(rows)}\n"
|
||||
)
|
||||
handle.write(comment)
|
||||
writer = csv.writer(handle)
|
||||
writer.writerow([
|
||||
"theme",
|
||||
"source_count",
|
||||
"commander_count",
|
||||
"card_count",
|
||||
"last_generated_at",
|
||||
"version",
|
||||
])
|
||||
for row in rows:
|
||||
writer.writerow([
|
||||
row.theme,
|
||||
row.source_count,
|
||||
row.commander_count,
|
||||
row.card_count,
|
||||
row.last_generated_at,
|
||||
row.version,
|
||||
])
|
||||
|
||||
if logs_directory is not None:
|
||||
logs_directory = logs_directory.resolve()
|
||||
logs_directory.mkdir(parents=True, exist_ok=True)
|
||||
copy_path = logs_directory / output_path.name
|
||||
shutil.copyfile(output_path, copy_path)
|
||||
|
||||
if not rows:
|
||||
raise RuntimeError(
|
||||
"No theme tags found while generating theme catalog; ensure card CSVs contain a themeTags column."
|
||||
)
|
||||
|
||||
return CatalogBuildResult(rows=rows, generated_at=generated_at_iso, version=version_hash, output_path=output_path)
|
||||
|
||||
|
||||
def _resolve_csv_directory(value: Optional[str]) -> Path:
|
||||
if value:
|
||||
return Path(value)
|
||||
env_override = os.environ.get("CSV_FILES_DIR")
|
||||
if env_override:
|
||||
return Path(env_override)
|
||||
return ROOT / DEFAULT_CSV_DIRECTORY
|
||||
|
||||
|
||||
def main(argv: Optional[Sequence[str]] = None) -> CatalogBuildResult:
|
||||
parser = argparse.ArgumentParser(description="Generate a normalized theme catalog CSV.")
|
||||
parser.add_argument(
|
||||
"--csv-dir",
|
||||
dest="csv_dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Directory containing card CSV files (defaults to CSV_FILES_DIR or settings.CSV_DIRECTORY)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
dest="output",
|
||||
type=Path,
|
||||
default=DEFAULT_OUTPUT_PATH,
|
||||
help="Destination CSV path (defaults to config/themes/theme_catalog.csv)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--logs-dir",
|
||||
dest="logs_dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional directory to mirror the generated catalog for diffing (e.g., logs/generated)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
csv_dir = _resolve_csv_directory(str(args.csv_dir) if args.csv_dir else None)
|
||||
result = build_theme_catalog(
|
||||
csv_directory=csv_dir,
|
||||
output_path=args.output,
|
||||
logs_directory=args.logs_dir,
|
||||
)
|
||||
print(
|
||||
f"Generated {len(result.rows)} themes -> {result.output_path} (version={result.version})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover - CLI entrypoint
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue