mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
159 lines
5.4 KiB
Python
159 lines
5.4 KiB
Python
"""Generate `background_cards.csv` from the master card dataset.
|
|
|
|
This script filters the full `cards.csv` export for cards whose type line contains
|
|
"Background" and writes the filtered rows to `background_cards.csv`. The output
|
|
maintains the same columns as the source data, ensures deterministic ordering,
|
|
and prepends a metadata comment with version and row count.
|
|
|
|
Usage (default paths derived from CSV_FILES_DIR environment variable)::
|
|
|
|
python -m code.scripts.generate_background_cards
|
|
python -m code.scripts.generate_background_cards --source other/cards.csv --output some/backgrounds.csv
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import csv
|
|
import datetime as _dt
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Sequence
|
|
|
|
from path_util import csv_dir
|
|
|
|
BACKGROUND_KEYWORD = "background"
|
|
DEFAULT_SOURCE_NAME = "cards.csv"
|
|
DEFAULT_OUTPUT_NAME = "background_cards.csv"
|
|
|
|
|
|
def _parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Generate background cards CSV")
|
|
parser.add_argument(
|
|
"--source",
|
|
type=Path,
|
|
help="Optional override for the source cards.csv file",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
type=Path,
|
|
help="Optional override for the generated background_cards.csv file",
|
|
)
|
|
parser.add_argument(
|
|
"--version",
|
|
type=str,
|
|
help="Optional version string to embed in the output metadata comment",
|
|
)
|
|
return parser.parse_args(argv)
|
|
|
|
|
|
def _resolve_paths(args: argparse.Namespace) -> tuple[Path, Path]:
|
|
base = Path(csv_dir()).resolve()
|
|
source = (args.source or (base / DEFAULT_SOURCE_NAME)).resolve()
|
|
output = (args.output or (base / DEFAULT_OUTPUT_NAME)).resolve()
|
|
return source, output
|
|
|
|
|
|
def _is_background_type(type_line: str | None) -> bool:
|
|
if not type_line:
|
|
return False
|
|
return BACKGROUND_KEYWORD in type_line.lower()
|
|
|
|
|
|
def _parse_theme_tags(raw: str | None) -> list[str]:
|
|
if not raw:
|
|
return []
|
|
text = raw.strip()
|
|
if not text:
|
|
return []
|
|
if text.startswith("[") and text.endswith("]"):
|
|
body = text[1:-1].strip()
|
|
if not body:
|
|
return []
|
|
tokens = [token.strip(" '\"") for token in body.split(",")]
|
|
return [token for token in tokens if token]
|
|
return [part.strip() for part in text.split(";") if part.strip()]
|
|
|
|
|
|
def _is_background_row(row: Dict[str, str]) -> bool:
|
|
if _is_background_type(row.get("type")):
|
|
return True
|
|
theme_tags = _parse_theme_tags(row.get("themeTags"))
|
|
return any(BACKGROUND_KEYWORD in tag.lower() for tag in theme_tags)
|
|
|
|
|
|
def _row_priority(row: Dict[str, str]) -> tuple[int, int]:
|
|
"""Return priority tuple for duplicate selection.
|
|
|
|
Prefer rows that explicitly declare a background type line, then those with
|
|
longer oracle text. Higher tuple values take precedence when comparing
|
|
candidates.
|
|
"""
|
|
|
|
type_line = row.get("type", "") or ""
|
|
has_type = BACKGROUND_KEYWORD in type_line.lower()
|
|
text_length = len((row.get("text") or "").strip())
|
|
return (1 if has_type else 0, text_length)
|
|
|
|
|
|
def _gather_background_rows(reader: csv.DictReader) -> list[Dict[str, str]]:
|
|
selected: Dict[str, Dict[str, str]] = {}
|
|
for row in reader:
|
|
if not row:
|
|
continue
|
|
name = (row.get("name") or "").strip()
|
|
if not name:
|
|
continue
|
|
if not _is_background_row(row):
|
|
continue
|
|
current = selected.get(name.lower())
|
|
if current is None:
|
|
selected[name.lower()] = row
|
|
continue
|
|
if _row_priority(row) > _row_priority(current):
|
|
selected[name.lower()] = row
|
|
ordered_names = sorted(selected.keys())
|
|
return [selected[key] for key in ordered_names]
|
|
|
|
|
|
def _ensure_all_columns(rows: Iterable[Dict[str, str]], headers: List[str]) -> None:
|
|
for row in rows:
|
|
for header in headers:
|
|
row.setdefault(header, "")
|
|
|
|
|
|
def _write_background_csv(output: Path, headers: List[str], rows: List[Dict[str, str]], version: str, source: Path) -> None:
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
now_utc = _dt.datetime.now(_dt.UTC).replace(microsecond=0)
|
|
metadata = {
|
|
"version": version,
|
|
"count": str(len(rows)),
|
|
"source": source.name,
|
|
"generated": now_utc.isoformat().replace("+00:00", "Z"),
|
|
}
|
|
meta_line = "# " + " ".join(f"{key}={value}" for key, value in metadata.items())
|
|
with output.open("w", encoding="utf-8", newline="") as handle:
|
|
handle.write(meta_line + "\n")
|
|
writer = csv.DictWriter(handle, fieldnames=headers)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow({key: row.get(key, "") for key in headers})
|
|
|
|
|
|
def main(argv: Sequence[str] | None = None) -> None:
|
|
args = _parse_args(argv)
|
|
source, output = _resolve_paths(args)
|
|
if not source.exists():
|
|
raise FileNotFoundError(f"Source cards CSV not found: {source}")
|
|
|
|
with source.open("r", encoding="utf-8", newline="") as handle:
|
|
reader = csv.DictReader(handle)
|
|
if reader.fieldnames is None:
|
|
raise ValueError("cards.csv is missing header row")
|
|
rows = _gather_background_rows(reader)
|
|
_ensure_all_columns(rows, list(reader.fieldnames))
|
|
|
|
version = args.version or _dt.datetime.now(_dt.UTC).strftime("%Y%m%d")
|
|
_write_background_csv(output, list(reader.fieldnames), rows, version, source)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|