mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 08:00:13 +01:00
feat: migrate to unified Parquet format with instant GitHub setup and 4x faster tagging
This commit is contained in:
parent
e9e949aae3
commit
8435312c8f
58 changed files with 11921 additions and 3961 deletions
|
|
@ -1,8 +1,8 @@
|
|||
"""Initialize the file_setup package."""
|
||||
|
||||
from .setup import setup, regenerate_csv_by_color
|
||||
from .setup import initial_setup, regenerate_processed_parquet
|
||||
|
||||
__all__ = [
|
||||
'setup',
|
||||
'regenerate_csv_by_color'
|
||||
'initial_setup',
|
||||
'regenerate_processed_parquet'
|
||||
]
|
||||
338
code/file_setup/data_loader.py
Normal file
338
code/file_setup/data_loader.py
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
"""Data loader abstraction for CSV and Parquet formats.
|
||||
|
||||
This module provides a unified interface for reading and writing card data
|
||||
in both CSV and Parquet formats. It handles format detection, conversion,
|
||||
and schema validation.
|
||||
|
||||
Introduced in v3.0.0 as part of the Parquet migration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from logging_util import get_logger
|
||||
from path_util import card_files_processed_dir
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# Required columns for deck building
|
||||
REQUIRED_COLUMNS = [
|
||||
"name",
|
||||
"colorIdentity",
|
||||
"type", # MTGJSON uses 'type' not 'types'
|
||||
"keywords",
|
||||
"manaValue",
|
||||
"text",
|
||||
"power",
|
||||
"toughness",
|
||||
]
|
||||
|
||||
|
||||
def validate_schema(df: pd.DataFrame, required: Optional[List[str]] = None) -> None:
|
||||
"""Validate that DataFrame contains required columns.
|
||||
|
||||
Args:
|
||||
df: DataFrame to validate
|
||||
required: List of required columns (uses REQUIRED_COLUMNS if None)
|
||||
|
||||
Raises:
|
||||
ValueError: If required columns are missing
|
||||
"""
|
||||
required = required or REQUIRED_COLUMNS
|
||||
missing = [col for col in required if col not in df.columns]
|
||||
|
||||
if missing:
|
||||
raise ValueError(
|
||||
f"Schema validation failed: missing required columns {missing}. "
|
||||
f"Available columns: {list(df.columns)}"
|
||||
)
|
||||
|
||||
logger.debug(f"✓ Schema validation passed ({len(required)} required columns present)")
|
||||
|
||||
|
||||
class DataLoader:
|
||||
"""Unified data loading interface supporting CSV and Parquet formats.
|
||||
|
||||
This class provides transparent access to card data regardless of the
|
||||
underlying storage format. It automatically detects the format based on
|
||||
file extensions and provides conversion utilities.
|
||||
|
||||
Examples:
|
||||
>>> loader = DataLoader()
|
||||
>>> df = loader.read_cards("card_files/processed/all_cards.parquet")
|
||||
>>> loader.write_cards(df, "output.parquet")
|
||||
>>> loader.convert("input.csv", "output.parquet")
|
||||
"""
|
||||
|
||||
def __init__(self, format: str = "auto"):
|
||||
"""Initialize the data loader.
|
||||
|
||||
Args:
|
||||
format: Format preference - "csv", "parquet", or "auto" (default: auto)
|
||||
"auto" detects format from file extension
|
||||
"""
|
||||
self.format = format.lower()
|
||||
if self.format not in ("csv", "parquet", "auto"):
|
||||
raise ValueError(f"Unsupported format: {format}. Use 'csv', 'parquet', or 'auto'.")
|
||||
|
||||
def read_cards(
|
||||
self,
|
||||
path: str,
|
||||
columns: Optional[List[str]] = None,
|
||||
format: Optional[str] = None
|
||||
) -> pd.DataFrame:
|
||||
"""Load card data from a file.
|
||||
|
||||
Args:
|
||||
path: File path (e.g., "card_files/processed/all_cards.parquet")
|
||||
columns: Optional list of columns to load (Parquet optimization)
|
||||
format: Override format detection (uses self.format if None)
|
||||
|
||||
Returns:
|
||||
DataFrame with card data
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file doesn't exist
|
||||
ValueError: If format is unsupported
|
||||
"""
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"Card data file not found: {path}")
|
||||
|
||||
detected_format = format or self._detect_format(path)
|
||||
|
||||
logger.debug(f"Loading card data from {path} (format: {detected_format})")
|
||||
|
||||
if detected_format == "csv":
|
||||
return self._read_csv(path, columns)
|
||||
elif detected_format == "parquet":
|
||||
return self._read_parquet(path, columns)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {detected_format}")
|
||||
|
||||
def write_cards(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
path: str,
|
||||
format: Optional[str] = None,
|
||||
index: bool = False
|
||||
) -> None:
|
||||
"""Save card data to a file.
|
||||
|
||||
Args:
|
||||
df: DataFrame to save
|
||||
path: Output file path
|
||||
format: Force format (overrides auto-detection)
|
||||
index: Whether to write DataFrame index (default: False)
|
||||
|
||||
Raises:
|
||||
ValueError: If format is unsupported
|
||||
"""
|
||||
detected_format = format or self._detect_format(path)
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
|
||||
|
||||
logger.debug(f"Writing card data to {path} (format: {detected_format}, rows: {len(df)})")
|
||||
|
||||
if detected_format == "csv":
|
||||
self._write_csv(df, path, index)
|
||||
elif detected_format == "parquet":
|
||||
self._write_parquet(df, path, index)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {detected_format}")
|
||||
|
||||
def convert(
|
||||
self,
|
||||
src_path: str,
|
||||
dst_path: str,
|
||||
columns: Optional[List[str]] = None
|
||||
) -> None:
|
||||
"""Convert between CSV and Parquet formats.
|
||||
|
||||
Args:
|
||||
src_path: Source file path
|
||||
dst_path: Destination file path
|
||||
columns: Optional list of columns to include (all if None)
|
||||
|
||||
Examples:
|
||||
>>> loader.convert("cards.csv", "cards.parquet")
|
||||
>>> loader.convert("cards.parquet", "cards.csv", columns=["name", "type"])
|
||||
"""
|
||||
logger.info(f"Converting {src_path} → {dst_path}")
|
||||
df = self.read_cards(src_path, columns=columns)
|
||||
self.write_cards(df, dst_path)
|
||||
logger.info(f"✓ Converted {len(df)} cards")
|
||||
|
||||
def _read_csv(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
|
||||
"""Read CSV file."""
|
||||
try:
|
||||
return pd.read_csv(path, usecols=columns, low_memory=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read CSV from {path}: {e}")
|
||||
raise
|
||||
|
||||
def _read_parquet(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
|
||||
"""Read Parquet file."""
|
||||
try:
|
||||
return pd.read_parquet(path, columns=columns)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read Parquet from {path}: {e}")
|
||||
raise
|
||||
|
||||
def _write_csv(self, df: pd.DataFrame, path: str, index: bool) -> None:
|
||||
"""Write CSV file."""
|
||||
try:
|
||||
df.to_csv(path, index=index)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write CSV to {path}: {e}")
|
||||
raise
|
||||
|
||||
def _write_parquet(self, df: pd.DataFrame, path: str, index: bool) -> None:
|
||||
"""Write Parquet file with Snappy compression."""
|
||||
try:
|
||||
df.to_parquet(path, index=index, compression="snappy", engine="pyarrow")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write Parquet to {path}: {e}")
|
||||
raise
|
||||
|
||||
def _detect_format(self, path: str) -> str:
|
||||
"""Detect file format from extension.
|
||||
|
||||
Args:
|
||||
path: File path to analyze
|
||||
|
||||
Returns:
|
||||
Format string: "csv" or "parquet"
|
||||
|
||||
Raises:
|
||||
ValueError: If format cannot be determined
|
||||
"""
|
||||
if self.format != "auto":
|
||||
return self.format
|
||||
|
||||
# Check file extension
|
||||
if path.endswith(".csv"):
|
||||
return "csv"
|
||||
elif path.endswith(".parquet"):
|
||||
return "parquet"
|
||||
|
||||
# Try to infer from existing files (no extension provided)
|
||||
if os.path.exists(f"{path}.parquet"):
|
||||
return "parquet"
|
||||
elif os.path.exists(f"{path}.csv"):
|
||||
return "csv"
|
||||
|
||||
raise ValueError(
|
||||
f"Cannot determine format for '{path}'. "
|
||||
"Use .csv or .parquet extension, or specify format explicitly."
|
||||
)
|
||||
|
||||
def write_batch_parquet(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
batch_id: int,
|
||||
tag: str = "",
|
||||
batches_dir: Optional[str] = None
|
||||
) -> str:
|
||||
"""Write a batch Parquet file (used during tagging).
|
||||
|
||||
Args:
|
||||
df: DataFrame to save as a batch
|
||||
batch_id: Unique batch identifier (e.g., 0, 1, 2...)
|
||||
tag: Optional tag to include in filename (e.g., "white", "commander")
|
||||
batches_dir: Directory for batch files (defaults to card_files/processed/batches)
|
||||
|
||||
Returns:
|
||||
Path to the written batch file
|
||||
|
||||
Example:
|
||||
>>> loader.write_batch_parquet(white_df, batch_id=0, tag="white")
|
||||
'card_files/processed/batches/batch_0_white.parquet'
|
||||
"""
|
||||
if batches_dir is None:
|
||||
batches_dir = os.path.join(card_files_processed_dir(), "batches")
|
||||
|
||||
os.makedirs(batches_dir, exist_ok=True)
|
||||
|
||||
# Build filename: batch_{id}_{tag}.parquet or batch_{id}.parquet
|
||||
filename = f"batch_{batch_id}_{tag}.parquet" if tag else f"batch_{batch_id}.parquet"
|
||||
path = os.path.join(batches_dir, filename)
|
||||
|
||||
logger.debug(f"Writing batch {batch_id} ({tag or 'no tag'}): {len(df)} cards → {path}")
|
||||
self.write_cards(df, path, format="parquet")
|
||||
|
||||
return path
|
||||
|
||||
def merge_batches(
|
||||
self,
|
||||
output_path: Optional[str] = None,
|
||||
batches_dir: Optional[str] = None,
|
||||
cleanup: bool = True
|
||||
) -> pd.DataFrame:
|
||||
"""Merge all batch Parquet files into a single output file.
|
||||
|
||||
Args:
|
||||
output_path: Path for merged output (defaults to card_files/processed/all_cards.parquet)
|
||||
batches_dir: Directory containing batch files (defaults to card_files/processed/batches)
|
||||
cleanup: Whether to delete batch files after merging (default: True)
|
||||
|
||||
Returns:
|
||||
Merged DataFrame
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If no batch files found
|
||||
|
||||
Example:
|
||||
>>> loader.merge_batches() # Merges all batches → all_cards.parquet
|
||||
"""
|
||||
if batches_dir is None:
|
||||
batches_dir = os.path.join(card_files_processed_dir(), "batches")
|
||||
|
||||
if output_path is None:
|
||||
from code.path_util import get_processed_cards_path
|
||||
output_path = get_processed_cards_path()
|
||||
|
||||
# Find all batch files
|
||||
batch_files = sorted(Path(batches_dir).glob("batch_*.parquet"))
|
||||
|
||||
if not batch_files:
|
||||
raise FileNotFoundError(f"No batch files found in {batches_dir}")
|
||||
|
||||
logger.info(f"Merging {len(batch_files)} batch files from {batches_dir}")
|
||||
|
||||
# Read and concatenate all batches
|
||||
dfs = []
|
||||
for batch_file in batch_files:
|
||||
logger.debug(f"Reading batch: {batch_file.name}")
|
||||
df = self.read_cards(str(batch_file), format="parquet")
|
||||
dfs.append(df)
|
||||
|
||||
# Merge all batches
|
||||
merged_df = pd.concat(dfs, ignore_index=True)
|
||||
logger.info(f"Merged {len(merged_df)} total cards from {len(dfs)} batches")
|
||||
|
||||
# Write merged output
|
||||
self.write_cards(merged_df, output_path, format="parquet")
|
||||
logger.info(f"✓ Wrote merged data to {output_path}")
|
||||
|
||||
# Cleanup batch files if requested
|
||||
if cleanup:
|
||||
logger.debug(f"Cleaning up {len(batch_files)} batch files")
|
||||
for batch_file in batch_files:
|
||||
batch_file.unlink()
|
||||
|
||||
# Remove batches directory if empty
|
||||
try:
|
||||
Path(batches_dir).rmdir()
|
||||
logger.debug(f"Removed empty batches directory: {batches_dir}")
|
||||
except OSError:
|
||||
pass # Directory not empty, keep it
|
||||
|
||||
return merged_df
|
||||
|
||||
362
code/file_setup/old/setup.py
Normal file
362
code/file_setup/old/setup.py
Normal file
|
|
@ -0,0 +1,362 @@
|
|||
"""MTG Python Deckbuilder setup module.
|
||||
|
||||
This module provides the main setup functionality for the MTG Python Deckbuilder
|
||||
application. It handles initial setup tasks such as downloading card data,
|
||||
creating color-filtered card lists, and gener logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
|
||||
|
||||
Key Features:
|
||||
- Initial setup and configuration
|
||||
- Card data download and processing
|
||||
- Color-based card filtering
|
||||
- Commander card list generation
|
||||
- CSV file management and validation
|
||||
|
||||
The module works in conjunction with setup_utils.py for utility functions and
|
||||
exceptions.py for error handling.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Standard library imports
|
||||
from enum import Enum
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# Third-party imports (optional)
|
||||
try:
|
||||
import inquirer # type: ignore
|
||||
except Exception:
|
||||
inquirer = None # Fallback to simple input-based menu when unavailable
|
||||
import pandas as pd
|
||||
|
||||
# Local imports
|
||||
import logging_util
|
||||
from settings import CSV_DIRECTORY
|
||||
from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
|
||||
from .setup_utils import (
|
||||
download_cards_csv,
|
||||
filter_dataframe,
|
||||
process_legendary_cards,
|
||||
check_csv_exists,
|
||||
save_color_filtered_csvs,
|
||||
enrich_commander_rows_with_tags,
|
||||
)
|
||||
from exceptions import (
|
||||
CSVFileNotFoundError,
|
||||
CommanderValidationError,
|
||||
MTGJSONDownloadError
|
||||
)
|
||||
from scripts import generate_background_cards as background_cards_script
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _generate_background_catalog(cards_path: str, output_path: str) -> None:
|
||||
"""Regenerate ``background_cards.csv`` from the latest cards dataset."""
|
||||
|
||||
logger.info('Generating background cards catalog')
|
||||
args = [
|
||||
'--source', cards_path,
|
||||
'--output', output_path,
|
||||
]
|
||||
try:
|
||||
background_cards_script.main(args)
|
||||
except Exception: # pragma: no cover - surfaced to caller/test
|
||||
logger.exception('Failed to generate background catalog')
|
||||
raise
|
||||
else:
|
||||
logger.info('Background cards catalog generated successfully')
|
||||
|
||||
# Create logger for this module
|
||||
logger = logging_util.logging.getLogger(__name__)
|
||||
logger.setLevel(logging_util.LOG_LEVEL)
|
||||
logger.addHandler(logging_util.file_handler)
|
||||
logger.addHandler(logging_util.stream_handler)
|
||||
|
||||
# Create CSV directory if it doesn't exist
|
||||
if not os.path.exists(CSV_DIRECTORY):
|
||||
os.makedirs(CSV_DIRECTORY)
|
||||
|
||||
## Note: using shared check_csv_exists from setup_utils to avoid duplication
|
||||
|
||||
def initial_setup() -> None:
|
||||
"""Perform initial setup by downloading card data and creating filtered CSV files.
|
||||
|
||||
Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
|
||||
and generates commander-eligible cards list. Uses utility functions from setup_utils.py
|
||||
for file operations and data processing.
|
||||
|
||||
Raises:
|
||||
CSVFileNotFoundError: If required CSV files cannot be found
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
logger.info('Checking for cards.csv file')
|
||||
|
||||
try:
|
||||
cards_file = f'{CSV_DIRECTORY}/cards.csv'
|
||||
try:
|
||||
with open(cards_file, 'r', encoding='utf-8'):
|
||||
logger.info('cards.csv exists')
|
||||
except FileNotFoundError:
|
||||
logger.info('cards.csv not found, downloading from mtgjson')
|
||||
download_cards_csv(MTGJSON_API_URL, cards_file)
|
||||
|
||||
df = pd.read_csv(cards_file, low_memory=False)
|
||||
|
||||
logger.info('Checking for color identity sorted files')
|
||||
# Generate color-identity filtered CSVs in one pass
|
||||
save_color_filtered_csvs(df, CSV_DIRECTORY)
|
||||
|
||||
# Generate commander list
|
||||
determine_commanders()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error during initial setup: {str(e)}')
|
||||
raise
|
||||
|
||||
## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
|
||||
|
||||
def determine_commanders() -> None:
|
||||
"""Generate commander_cards.csv containing all cards eligible to be commanders.
|
||||
|
||||
This function processes the card database to identify and validate commander-eligible cards,
|
||||
applying comprehensive validation steps and filtering criteria.
|
||||
|
||||
Raises:
|
||||
CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
|
||||
MTGJSONDownloadError: If downloading cards data fails
|
||||
CommanderValidationError: If commander validation fails
|
||||
DataFrameProcessingError: If data processing operations fail
|
||||
"""
|
||||
logger.info('Starting commander card generation process')
|
||||
|
||||
try:
|
||||
# Check for cards.csv with progress tracking
|
||||
cards_file = f'{CSV_DIRECTORY}/cards.csv'
|
||||
if not check_csv_exists(cards_file):
|
||||
logger.info('cards.csv not found, initiating download')
|
||||
download_cards_csv(MTGJSON_API_URL, cards_file)
|
||||
else:
|
||||
logger.info('cards.csv found, proceeding with processing')
|
||||
|
||||
# Load and process cards data
|
||||
logger.info('Loading card data from CSV')
|
||||
df = pd.read_csv(cards_file, low_memory=False)
|
||||
|
||||
# Process legendary cards with validation
|
||||
logger.info('Processing and validating legendary cards')
|
||||
try:
|
||||
filtered_df = process_legendary_cards(df)
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation failed: {str(e)}')
|
||||
raise
|
||||
|
||||
# Apply standard filters
|
||||
logger.info('Applying standard card filters')
|
||||
filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
|
||||
|
||||
logger.info('Enriching commander metadata with theme and creature tags')
|
||||
filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
|
||||
|
||||
# Save commander cards
|
||||
logger.info('Saving validated commander cards')
|
||||
commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
|
||||
filtered_df.to_csv(commander_path, index=False)
|
||||
|
||||
background_output = f'{CSV_DIRECTORY}/background_cards.csv'
|
||||
_generate_background_catalog(cards_file, background_output)
|
||||
|
||||
logger.info('Commander card generation completed successfully')
|
||||
|
||||
except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
|
||||
logger.error(f'File operation error: {str(e)}')
|
||||
raise
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation error: {str(e)}')
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f'Unexpected error during commander generation: {str(e)}')
|
||||
raise
|
||||
|
||||
def regenerate_csvs_all() -> None:
|
||||
"""Regenerate all color-filtered CSV files from latest card data.
|
||||
|
||||
Downloads fresh card data and recreates all color-filtered CSV files.
|
||||
Useful for updating the card database when new sets are released.
|
||||
|
||||
Raises:
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
try:
|
||||
logger.info('Downloading latest card data from MTGJSON')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
|
||||
logger.info('Regenerating color identity sorted files')
|
||||
save_color_filtered_csvs(df, CSV_DIRECTORY)
|
||||
|
||||
logger.info('Regenerating commander cards')
|
||||
determine_commanders()
|
||||
|
||||
logger.info('Card database regeneration complete')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate card database: {str(e)}')
|
||||
raise
|
||||
# Once files are regenerated, create a new legendary list (already executed in try)
|
||||
|
||||
def regenerate_csv_by_color(color: str) -> None:
|
||||
"""Regenerate CSV file for a specific color identity.
|
||||
|
||||
Args:
|
||||
color: Color name to regenerate CSV for (e.g. 'white', 'blue')
|
||||
|
||||
Raises:
|
||||
ValueError: If color is not valid
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
try:
|
||||
if color not in SETUP_COLORS:
|
||||
raise ValueError(f'Invalid color: {color}')
|
||||
|
||||
color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
|
||||
|
||||
logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='skip', # Skip malformed rows (MTGJSON CSV has escaping issues)
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')
|
||||
# Use shared utilities to base-filter once then slice color, honoring bans
|
||||
base_df = filter_dataframe(df, BANNED_CARDS)
|
||||
base_df[base_df['colorIdentity'] == color_abv].to_csv(
|
||||
f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
|
||||
)
|
||||
|
||||
logger.info(f'Successfully regenerated {color} cards database')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate {color} cards: {str(e)}')
|
||||
raise
|
||||
|
||||
class SetupOption(Enum):
|
||||
"""Enum for setup menu options."""
|
||||
INITIAL_SETUP = 'Initial Setup'
|
||||
REGENERATE_CSV = 'Regenerate CSV Files'
|
||||
BACK = 'Back'
|
||||
|
||||
def _display_setup_menu() -> SetupOption:
|
||||
"""Display the setup menu and return the selected option.
|
||||
|
||||
Returns:
|
||||
SetupOption: The selected menu option
|
||||
"""
|
||||
if inquirer is not None:
|
||||
question: List[Dict[str, Any]] = [
|
||||
inquirer.List(
|
||||
'menu',
|
||||
choices=[option.value for option in SetupOption],
|
||||
carousel=True)]
|
||||
answer = inquirer.prompt(question)
|
||||
return SetupOption(answer['menu'])
|
||||
|
||||
# Simple fallback when inquirer isn't installed (e.g., headless/container)
|
||||
options = list(SetupOption)
|
||||
print("\nSetup Menu:")
|
||||
for idx, opt in enumerate(options, start=1):
|
||||
print(f" {idx}) {opt.value}")
|
||||
while True:
|
||||
try:
|
||||
sel = input("Select an option [1]: ").strip() or "1"
|
||||
i = int(sel)
|
||||
if 1 <= i <= len(options):
|
||||
return options[i - 1]
|
||||
except KeyboardInterrupt:
|
||||
print("")
|
||||
return SetupOption.BACK
|
||||
except Exception:
|
||||
pass
|
||||
print("Invalid selection. Please try again.")
|
||||
|
||||
def setup() -> bool:
|
||||
"""Run the setup process for the MTG Python Deckbuilder.
|
||||
|
||||
This function provides a menu-driven interface to:
|
||||
1. Perform initial setup by downloading and processing card data
|
||||
2. Regenerate CSV files with updated card data
|
||||
3. Perform all tagging processes on the color-sorted csv files
|
||||
|
||||
The function handles errors gracefully and provides feedback through logging.
|
||||
|
||||
Returns:
|
||||
bool: True if setup completed successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
print('Which setup operation would you like to perform?\n'
|
||||
'If this is your first time setting up, do the initial setup.\n'
|
||||
'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
|
||||
|
||||
choice = _display_setup_menu()
|
||||
|
||||
if choice == SetupOption.INITIAL_SETUP:
|
||||
logger.info('Starting initial setup')
|
||||
initial_setup()
|
||||
logger.info('Initial setup completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.REGENERATE_CSV:
|
||||
logger.info('Starting CSV regeneration')
|
||||
regenerate_csvs_all()
|
||||
logger.info('CSV regeneration completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.BACK:
|
||||
logger.info('Setup cancelled by user')
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error during setup: {e}')
|
||||
raise
|
||||
|
||||
return False
|
||||
114
code/file_setup/old/setup_constants.py
Normal file
114
code/file_setup/old/setup_constants.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
from typing import Dict, List
|
||||
from settings import (
|
||||
SETUP_COLORS,
|
||||
COLOR_ABRV,
|
||||
CARD_DATA_COLUMNS as COLUMN_ORDER, # backward compatible alias
|
||||
CARD_DATA_COLUMNS as TAGGED_COLUMN_ORDER,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'SETUP_COLORS', 'COLOR_ABRV', 'COLUMN_ORDER', 'TAGGED_COLUMN_ORDER',
|
||||
'BANNED_CARDS', 'MTGJSON_API_URL', 'LEGENDARY_OPTIONS', 'NON_LEGAL_SETS',
|
||||
'CARD_TYPES_TO_EXCLUDE', 'CSV_PROCESSING_COLUMNS', 'SORT_CONFIG',
|
||||
'FILTER_CONFIG'
|
||||
]
|
||||
|
||||
# Banned cards consolidated here (remains specific to setup concerns)
|
||||
BANNED_CARDS: List[str] = [
|
||||
# Commander banned list
|
||||
'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
|
||||
'Chaos Orb', 'Channel', 'Dockside Extortionist',
|
||||
'Emrakul, the Aeons Torn',
|
||||
'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
|
||||
'Flash', 'Golos, Tireless Pilgrim',
|
||||
'Griselbrand', 'Hullbreacher', 'Iona, Shield of Emeria',
|
||||
'Karakas', 'Jeweled Lotus', 'Leovold, Emissary of Trest',
|
||||
'Library of Alexandria', 'Limited Resources', 'Lutri, the Spellchaser',
|
||||
'Mana Crypt', 'Mox Emerald', 'Mox Jet', 'Mox Pearl', 'Mox Ruby',
|
||||
'Mox Sapphire', 'Nadu, Winged Wisdom',
|
||||
'Paradox Engine', 'Primeval Titan', 'Prophet of Kruphix',
|
||||
'Recurring Nightmare', 'Rofellos, Llanowar Emissary', 'Shahrazad',
|
||||
'Sundering Titan', 'Sylvan Primordial',
|
||||
'Time Vault', 'Time Walk', 'Tinker', 'Tolarian Academy',
|
||||
'Trade Secrets', 'Upheaval', "Yawgmoth's Bargain",
|
||||
# Problematic / culturally sensitive or banned in other formats
|
||||
'Invoke Prejudice', 'Cleanse', 'Stone-Throwing Devils', 'Pradesh Gypsies',
|
||||
'Jihad', 'Imprison', 'Crusade',
|
||||
# Cards of the Hero type (non creature)
|
||||
"The Protector", "The Hunter", "The Savant", "The Explorer",
|
||||
"The Philosopher", "The Harvester", "The Tyrant", "The Vanquisher",
|
||||
"The Avenger", "The Slayer", "The Warmonger", "The Destined",
|
||||
"The Warrior", "The General", "The Provider", "The Champion",
|
||||
# Hero Equipment
|
||||
"Spear of the General", "Lash of the Tyrant", "Bow of the Hunter",
|
||||
"Cloak of the Philosopher", "Axe of the Warmonger"
|
||||
]
|
||||
|
||||
# Constants for setup and CSV processing
|
||||
MTGJSON_API_URL: str = 'https://mtgjson.com/api/v5/csv/cards.csv'
|
||||
|
||||
LEGENDARY_OPTIONS: List[str] = [
|
||||
'Legendary Creature',
|
||||
'Legendary Artifact',
|
||||
'Legendary Artifact Creature',
|
||||
'Legendary Enchantment Creature',
|
||||
'Legendary Planeswalker'
|
||||
]
|
||||
|
||||
NON_LEGAL_SETS: List[str] = [
|
||||
'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
|
||||
'UGL', 'UND', 'UNH', 'UST'
|
||||
]
|
||||
|
||||
CARD_TYPES_TO_EXCLUDE: List[str] = [
|
||||
'Plane —',
|
||||
'Conspiracy',
|
||||
'Vanguard',
|
||||
'Scheme',
|
||||
'Phenomenon',
|
||||
'Stickers',
|
||||
'Attraction',
|
||||
'Contraption'
|
||||
]
|
||||
|
||||
# Columns to keep when processing CSV files
|
||||
CSV_PROCESSING_COLUMNS: List[str] = [
|
||||
'name', # Card name
|
||||
'faceName', # Name of specific face for multi-faced cards
|
||||
'edhrecRank', # Card's rank on EDHREC
|
||||
'colorIdentity', # Color identity for Commander format
|
||||
'colors', # Actual colors in card's mana cost
|
||||
'manaCost', # Mana cost string
|
||||
'manaValue', # Converted mana cost
|
||||
'type', # Card type line
|
||||
'layout', # Card layout (normal, split, etc)
|
||||
'text', # Card text/rules
|
||||
'power', # Power (for creatures)
|
||||
'toughness', # Toughness (for creatures)
|
||||
'keywords', # Card's keywords
|
||||
'side' # Side identifier for multi-faced cards
|
||||
]
|
||||
|
||||
# Configuration for DataFrame sorting operations
|
||||
SORT_CONFIG = {
|
||||
'columns': ['name', 'side'], # Columns to sort by
|
||||
'case_sensitive': False # Ignore case when sorting
|
||||
}
|
||||
|
||||
# Configuration for DataFrame filtering operations
|
||||
FILTER_CONFIG: Dict[str, Dict[str, List[str]]] = {
|
||||
'layout': {
|
||||
'exclude': ['reversible_card']
|
||||
},
|
||||
'availability': {
|
||||
'require': ['paper']
|
||||
},
|
||||
'promoTypes': {
|
||||
'exclude': ['playtest']
|
||||
},
|
||||
'securityStamp': {
|
||||
'exclude': ['Heart', 'Acorn']
|
||||
}
|
||||
}
|
||||
|
||||
# COLUMN_ORDER and TAGGED_COLUMN_ORDER now sourced from settings via CARD_DATA_COLUMNS
|
||||
342
code/file_setup/old/setup_csv.py
Normal file
342
code/file_setup/old/setup_csv.py
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
"""MTG Python Deckbuilder setup module.
|
||||
|
||||
This module provides the main setup functionality for the MTG Python Deckbuilder
|
||||
application. It handles initial setup tasks such as downloading card data,
|
||||
creating color-filtered card lists, and gener logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
|
||||
|
||||
Key Features:
|
||||
- Initial setup and configuration
|
||||
- Card data download and processing
|
||||
- Color-based card filtering
|
||||
- Commander card list generation
|
||||
- CSV file management and validation
|
||||
|
||||
The module works in conjunction with setup_utils.py for utility functions and
|
||||
exceptions.py for error handling.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Standard library imports
|
||||
from enum import Enum
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# Third-party imports (optional)
|
||||
try:
|
||||
import inquirer # type: ignore
|
||||
except Exception:
|
||||
inquirer = None # Fallback to simple input-based menu when unavailable
|
||||
import pandas as pd
|
||||
|
||||
# Local imports
|
||||
import logging_util
|
||||
from settings import CSV_DIRECTORY
|
||||
from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
|
||||
from .setup_utils import (
|
||||
download_cards_csv,
|
||||
filter_dataframe,
|
||||
process_legendary_cards,
|
||||
check_csv_exists,
|
||||
save_color_filtered_csvs,
|
||||
enrich_commander_rows_with_tags,
|
||||
)
|
||||
from exceptions import (
|
||||
CSVFileNotFoundError,
|
||||
CommanderValidationError,
|
||||
MTGJSONDownloadError
|
||||
)
|
||||
from scripts import generate_background_cards as background_cards_script
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _generate_background_catalog(cards_path: str, output_path: str) -> None:
|
||||
"""Regenerate ``background_cards.csv`` from the latest cards dataset."""
|
||||
|
||||
logger.info('Generating background cards catalog')
|
||||
args = [
|
||||
'--source', cards_path,
|
||||
'--output', output_path,
|
||||
]
|
||||
try:
|
||||
background_cards_script.main(args)
|
||||
except Exception: # pragma: no cover - surfaced to caller/test
|
||||
logger.exception('Failed to generate background catalog')
|
||||
raise
|
||||
else:
|
||||
logger.info('Background cards catalog generated successfully')
|
||||
|
||||
# Create logger for this module
|
||||
logger = logging_util.logging.getLogger(__name__)
|
||||
logger.setLevel(logging_util.LOG_LEVEL)
|
||||
logger.addHandler(logging_util.file_handler)
|
||||
logger.addHandler(logging_util.stream_handler)
|
||||
|
||||
# Create CSV directory if it doesn't exist
|
||||
if not os.path.exists(CSV_DIRECTORY):
|
||||
os.makedirs(CSV_DIRECTORY)
|
||||
|
||||
## Note: using shared check_csv_exists from setup_utils to avoid duplication
|
||||
|
||||
def initial_setup() -> None:
|
||||
"""Perform initial setup by downloading and processing card data.
|
||||
|
||||
**MIGRATION NOTE**: This function now delegates to the Parquet-based setup
|
||||
(initial_setup_parquet) instead of the legacy CSV workflow. The old CSV-based
|
||||
setup is preserved in code/file_setup/old/setup.py for reference.
|
||||
|
||||
Downloads the latest card data from MTGJSON as Parquet, processes it, and creates
|
||||
the unified all_cards.parquet file. No color-specific files are generated - filtering
|
||||
happens at query time instead.
|
||||
|
||||
Raises:
|
||||
Various exceptions from Parquet download/processing steps
|
||||
"""
|
||||
from .setup_parquet import initial_setup_parquet
|
||||
initial_setup_parquet()
|
||||
|
||||
## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
|
||||
|
||||
def determine_commanders() -> None:
|
||||
"""Generate commander_cards.csv containing all cards eligible to be commanders.
|
||||
|
||||
This function processes the card database to identify and validate commander-eligible cards,
|
||||
applying comprehensive validation steps and filtering criteria.
|
||||
|
||||
Raises:
|
||||
CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
|
||||
MTGJSONDownloadError: If downloading cards data fails
|
||||
CommanderValidationError: If commander validation fails
|
||||
DataFrameProcessingError: If data processing operations fail
|
||||
"""
|
||||
logger.info('Starting commander card generation process')
|
||||
|
||||
try:
|
||||
# Check for cards.csv with progress tracking
|
||||
cards_file = f'{CSV_DIRECTORY}/cards.csv'
|
||||
if not check_csv_exists(cards_file):
|
||||
logger.info('cards.csv not found, initiating download')
|
||||
download_cards_csv(MTGJSON_API_URL, cards_file)
|
||||
else:
|
||||
logger.info('cards.csv found, proceeding with processing')
|
||||
|
||||
# Load and process cards data
|
||||
logger.info('Loading card data from CSV')
|
||||
df = pd.read_csv(cards_file, low_memory=False)
|
||||
|
||||
# Process legendary cards with validation
|
||||
logger.info('Processing and validating legendary cards')
|
||||
try:
|
||||
filtered_df = process_legendary_cards(df)
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation failed: {str(e)}')
|
||||
raise
|
||||
|
||||
# Apply standard filters
|
||||
logger.info('Applying standard card filters')
|
||||
filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
|
||||
|
||||
logger.info('Enriching commander metadata with theme and creature tags')
|
||||
filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
|
||||
|
||||
# Save commander cards
|
||||
logger.info('Saving validated commander cards')
|
||||
commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
|
||||
filtered_df.to_csv(commander_path, index=False)
|
||||
|
||||
background_output = f'{CSV_DIRECTORY}/background_cards.csv'
|
||||
_generate_background_catalog(cards_file, background_output)
|
||||
|
||||
logger.info('Commander card generation completed successfully')
|
||||
|
||||
except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
|
||||
logger.error(f'File operation error: {str(e)}')
|
||||
raise
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation error: {str(e)}')
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f'Unexpected error during commander generation: {str(e)}')
|
||||
raise
|
||||
|
||||
def regenerate_csvs_all() -> None:
|
||||
"""Regenerate all color-filtered CSV files from latest card data.
|
||||
|
||||
Downloads fresh card data and recreates all color-filtered CSV files.
|
||||
Useful for updating the card database when new sets are released.
|
||||
|
||||
Raises:
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
try:
|
||||
logger.info('Downloading latest card data from MTGJSON')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
|
||||
logger.info('Regenerating color identity sorted files')
|
||||
save_color_filtered_csvs(df, CSV_DIRECTORY)
|
||||
|
||||
logger.info('Regenerating commander cards')
|
||||
determine_commanders()
|
||||
|
||||
logger.info('Card database regeneration complete')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate card database: {str(e)}')
|
||||
raise
|
||||
# Once files are regenerated, create a new legendary list (already executed in try)
|
||||
|
||||
def regenerate_csv_by_color(color: str) -> None:
|
||||
"""Regenerate CSV file for a specific color identity.
|
||||
|
||||
Args:
|
||||
color: Color name to regenerate CSV for (e.g. 'white', 'blue')
|
||||
|
||||
Raises:
|
||||
ValueError: If color is not valid
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
try:
|
||||
if color not in SETUP_COLORS:
|
||||
raise ValueError(f'Invalid color: {color}')
|
||||
|
||||
color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
|
||||
|
||||
logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='skip', # Skip malformed rows (MTGJSON CSV has escaping issues)
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')
|
||||
# Use shared utilities to base-filter once then slice color, honoring bans
|
||||
base_df = filter_dataframe(df, BANNED_CARDS)
|
||||
base_df[base_df['colorIdentity'] == color_abv].to_csv(
|
||||
f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
|
||||
)
|
||||
|
||||
logger.info(f'Successfully regenerated {color} cards database')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate {color} cards: {str(e)}')
|
||||
raise
|
||||
|
||||
class SetupOption(Enum):
|
||||
"""Enum for setup menu options."""
|
||||
INITIAL_SETUP = 'Initial Setup'
|
||||
REGENERATE_CSV = 'Regenerate CSV Files'
|
||||
BACK = 'Back'
|
||||
|
||||
def _display_setup_menu() -> SetupOption:
|
||||
"""Display the setup menu and return the selected option.
|
||||
|
||||
Returns:
|
||||
SetupOption: The selected menu option
|
||||
"""
|
||||
if inquirer is not None:
|
||||
question: List[Dict[str, Any]] = [
|
||||
inquirer.List(
|
||||
'menu',
|
||||
choices=[option.value for option in SetupOption],
|
||||
carousel=True)]
|
||||
answer = inquirer.prompt(question)
|
||||
return SetupOption(answer['menu'])
|
||||
|
||||
# Simple fallback when inquirer isn't installed (e.g., headless/container)
|
||||
options = list(SetupOption)
|
||||
print("\nSetup Menu:")
|
||||
for idx, opt in enumerate(options, start=1):
|
||||
print(f" {idx}) {opt.value}")
|
||||
while True:
|
||||
try:
|
||||
sel = input("Select an option [1]: ").strip() or "1"
|
||||
i = int(sel)
|
||||
if 1 <= i <= len(options):
|
||||
return options[i - 1]
|
||||
except KeyboardInterrupt:
|
||||
print("")
|
||||
return SetupOption.BACK
|
||||
except Exception:
|
||||
pass
|
||||
print("Invalid selection. Please try again.")
|
||||
|
||||
def setup() -> bool:
|
||||
"""Run the setup process for the MTG Python Deckbuilder.
|
||||
|
||||
This function provides a menu-driven interface to:
|
||||
1. Perform initial setup by downloading and processing card data
|
||||
2. Regenerate CSV files with updated card data
|
||||
3. Perform all tagging processes on the color-sorted csv files
|
||||
|
||||
The function handles errors gracefully and provides feedback through logging.
|
||||
|
||||
Returns:
|
||||
bool: True if setup completed successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
print('Which setup operation would you like to perform?\n'
|
||||
'If this is your first time setting up, do the initial setup.\n'
|
||||
'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
|
||||
|
||||
choice = _display_setup_menu()
|
||||
|
||||
if choice == SetupOption.INITIAL_SETUP:
|
||||
logger.info('Starting initial setup')
|
||||
initial_setup()
|
||||
logger.info('Initial setup completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.REGENERATE_CSV:
|
||||
logger.info('Starting CSV regeneration')
|
||||
regenerate_csvs_all()
|
||||
logger.info('CSV regeneration completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.BACK:
|
||||
logger.info('Setup cancelled by user')
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error during setup: {e}')
|
||||
raise
|
||||
|
||||
return False
|
||||
776
code/file_setup/old/setup_utils.py
Normal file
776
code/file_setup/old/setup_utils.py
Normal file
|
|
@ -0,0 +1,776 @@
|
|||
"""MTG Python Deckbuilder setup utilities.
|
||||
|
||||
This module provides utility functions for setting up and managing the MTG Python Deckbuilder
|
||||
application. It handles tasks such as downloading card data, filtering cards by various criteria,
|
||||
and processing legendary creatures for commander format.
|
||||
|
||||
Key Features:
|
||||
- Card data download from MTGJSON
|
||||
- DataFrame filtering and processing
|
||||
- Color identity filtering
|
||||
- Commander validation
|
||||
- CSV file management
|
||||
|
||||
The module integrates with settings.py for configuration and exceptions.py for error handling.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Standard library imports
|
||||
import ast
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Union, TypedDict, Iterable, Dict, Any
|
||||
|
||||
# Third-party imports
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# Local application imports
|
||||
from .setup_constants import (
|
||||
CSV_PROCESSING_COLUMNS,
|
||||
CARD_TYPES_TO_EXCLUDE,
|
||||
NON_LEGAL_SETS,
|
||||
SORT_CONFIG,
|
||||
FILTER_CONFIG,
|
||||
COLUMN_ORDER,
|
||||
TAGGED_COLUMN_ORDER,
|
||||
SETUP_COLORS,
|
||||
COLOR_ABRV,
|
||||
BANNED_CARDS,
|
||||
)
|
||||
from exceptions import (
|
||||
MTGJSONDownloadError,
|
||||
DataFrameProcessingError,
|
||||
ColorFilterError,
|
||||
CommanderValidationError
|
||||
)
|
||||
from type_definitions import CardLibraryDF
|
||||
from settings import FILL_NA_COLUMNS, CSV_DIRECTORY
|
||||
import logging_util
|
||||
|
||||
# Create logger for this module
|
||||
logger = logging_util.logging.getLogger(__name__)
|
||||
logger.setLevel(logging_util.LOG_LEVEL)
|
||||
logger.addHandler(logging_util.file_handler)
|
||||
logger.addHandler(logging_util.stream_handler)
|
||||
|
||||
|
||||
def _is_primary_side(value: object) -> bool:
|
||||
"""Return True when the provided side marker corresponds to a primary face."""
|
||||
try:
|
||||
if pd.isna(value):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
text = str(value).strip().lower()
|
||||
return text in {"", "a"}
|
||||
|
||||
|
||||
def _summarize_secondary_face_exclusions(
|
||||
names: Iterable[str],
|
||||
source_df: pd.DataFrame,
|
||||
) -> List[Dict[str, Any]]:
|
||||
summaries: List[Dict[str, Any]] = []
|
||||
if not names:
|
||||
return summaries
|
||||
|
||||
for raw_name in names:
|
||||
name = str(raw_name)
|
||||
group = source_df[source_df['name'] == name]
|
||||
if group.empty:
|
||||
continue
|
||||
|
||||
primary_rows = group[group['side'].apply(_is_primary_side)] if 'side' in group.columns else pd.DataFrame()
|
||||
primary_face = (
|
||||
str(primary_rows['faceName'].iloc[0])
|
||||
if not primary_rows.empty and 'faceName' in primary_rows.columns
|
||||
else ""
|
||||
)
|
||||
layout = str(group['layout'].iloc[0]) if 'layout' in group.columns and not group.empty else ""
|
||||
faces = sorted(set(str(v) for v in group.get('faceName', pd.Series(dtype=str)).dropna().tolist()))
|
||||
eligible_faces = sorted(
|
||||
set(
|
||||
str(v)
|
||||
for v in group
|
||||
.loc[~group['side'].apply(_is_primary_side) if 'side' in group.columns else [False] * len(group)]
|
||||
.get('faceName', pd.Series(dtype=str))
|
||||
.dropna()
|
||||
.tolist()
|
||||
)
|
||||
)
|
||||
|
||||
summaries.append(
|
||||
{
|
||||
"name": name,
|
||||
"primary_face": primary_face or name.split('//')[0].strip(),
|
||||
"layout": layout,
|
||||
"faces": faces,
|
||||
"eligible_faces": eligible_faces,
|
||||
"reason": "secondary_face_only",
|
||||
}
|
||||
)
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
def _write_commander_exclusions_log(entries: List[Dict[str, Any]]) -> None:
|
||||
"""Persist commander exclusion diagnostics for downstream tooling."""
|
||||
|
||||
path = Path(CSV_DIRECTORY) / ".commander_exclusions.json"
|
||||
|
||||
if not entries:
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.debug("Unable to remove commander exclusion log: %s", exc)
|
||||
return
|
||||
|
||||
payload = {
|
||||
"generated_at": datetime.now().isoformat(timespec='seconds'),
|
||||
"secondary_face_only": entries,
|
||||
}
|
||||
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open('w', encoding='utf-8') as handle:
|
||||
json.dump(payload, handle, indent=2, ensure_ascii=False)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to write commander exclusion diagnostics: %s", exc)
|
||||
|
||||
|
||||
def _enforce_primary_face_commander_rules(
|
||||
candidate_df: pd.DataFrame,
|
||||
source_df: pd.DataFrame,
|
||||
) -> pd.DataFrame:
|
||||
"""Retain only primary faces and record any secondary-face-only exclusions."""
|
||||
|
||||
if candidate_df.empty or 'side' not in candidate_df.columns:
|
||||
_write_commander_exclusions_log([])
|
||||
return candidate_df
|
||||
|
||||
mask_primary = candidate_df['side'].apply(_is_primary_side)
|
||||
primary_df = candidate_df[mask_primary].copy()
|
||||
secondary_df = candidate_df[~mask_primary]
|
||||
|
||||
primary_names = set(str(n) for n in primary_df.get('name', pd.Series(dtype=str)))
|
||||
secondary_only_names = sorted(
|
||||
set(str(n) for n in secondary_df.get('name', pd.Series(dtype=str))) - primary_names
|
||||
)
|
||||
|
||||
if secondary_only_names:
|
||||
logger.info(
|
||||
"Excluding %d commander entries where only a secondary face is eligible: %s",
|
||||
len(secondary_only_names),
|
||||
", ".join(secondary_only_names),
|
||||
)
|
||||
|
||||
entries = _summarize_secondary_face_exclusions(secondary_only_names, source_df)
|
||||
_write_commander_exclusions_log(entries)
|
||||
|
||||
return primary_df
|
||||
|
||||
|
||||
def _coerce_tag_list(value: object) -> List[str]:
|
||||
"""Normalize various list-like representations into a list of strings."""
|
||||
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, float) and pd.isna(value):
|
||||
return []
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
return [str(v).strip() for v in value if str(v).strip()]
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return []
|
||||
try:
|
||||
parsed = ast.literal_eval(text)
|
||||
if isinstance(parsed, (list, tuple, set)):
|
||||
return [str(v).strip() for v in parsed if str(v).strip()]
|
||||
except Exception:
|
||||
pass
|
||||
parts = [part.strip() for part in text.replace(";", ",").split(",")]
|
||||
return [part for part in parts if part]
|
||||
|
||||
|
||||
def _collect_commander_tag_metadata(csv_dir: Union[str, Path]) -> Dict[str, Dict[str, List[str]]]:
|
||||
"""Aggregate theme and creature tags from color-tagged CSV files."""
|
||||
|
||||
path = Path(csv_dir)
|
||||
if not path.exists():
|
||||
return {}
|
||||
|
||||
combined: Dict[str, Dict[str, set[str]]] = {}
|
||||
columns = ("themeTags", "creatureTypes", "roleTags")
|
||||
|
||||
for color in SETUP_COLORS:
|
||||
color_path = path / f"{color}_cards.csv"
|
||||
if not color_path.exists():
|
||||
continue
|
||||
try:
|
||||
df = pd.read_csv(color_path, low_memory=False)
|
||||
except Exception as exc:
|
||||
logger.debug("Unable to read %s for commander tag enrichment: %s", color_path, exc)
|
||||
continue
|
||||
|
||||
if df.empty or ("name" not in df.columns and "faceName" not in df.columns):
|
||||
continue
|
||||
|
||||
for _, row in df.iterrows():
|
||||
face_key = str(row.get("faceName", "")).strip()
|
||||
name_key = str(row.get("name", "")).strip()
|
||||
keys = {k for k in (face_key, name_key) if k}
|
||||
if not keys:
|
||||
continue
|
||||
|
||||
for key in keys:
|
||||
bucket = combined.setdefault(key, {col: set() for col in columns})
|
||||
for col in columns:
|
||||
if col not in row:
|
||||
continue
|
||||
values = _coerce_tag_list(row.get(col))
|
||||
if values:
|
||||
bucket[col].update(values)
|
||||
|
||||
enriched: Dict[str, Dict[str, List[str]]] = {}
|
||||
for key, data in combined.items():
|
||||
enriched[key] = {col: sorted(values) for col, values in data.items() if values}
|
||||
return enriched
|
||||
|
||||
|
||||
def enrich_commander_rows_with_tags(
|
||||
df: pd.DataFrame,
|
||||
csv_dir: Union[str, Path],
|
||||
) -> pd.DataFrame:
|
||||
"""Attach theme and creature tag metadata to commander rows when available."""
|
||||
|
||||
if df.empty:
|
||||
df = df.copy()
|
||||
for column in ("themeTags", "creatureTypes", "roleTags"):
|
||||
if column not in df.columns:
|
||||
df[column] = []
|
||||
return df
|
||||
|
||||
metadata = _collect_commander_tag_metadata(csv_dir)
|
||||
if not metadata:
|
||||
df = df.copy()
|
||||
for column in ("themeTags", "creatureTypes", "roleTags"):
|
||||
if column not in df.columns:
|
||||
df[column] = [[] for _ in range(len(df))]
|
||||
return df
|
||||
|
||||
df = df.copy()
|
||||
for column in ("themeTags", "creatureTypes", "roleTags"):
|
||||
if column not in df.columns:
|
||||
df[column] = [[] for _ in range(len(df))]
|
||||
|
||||
theme_values: List[List[str]] = []
|
||||
creature_values: List[List[str]] = []
|
||||
role_values: List[List[str]] = []
|
||||
|
||||
for _, row in df.iterrows():
|
||||
face_key = str(row.get("faceName", "")).strip()
|
||||
name_key = str(row.get("name", "")).strip()
|
||||
|
||||
entry_face = metadata.get(face_key, {})
|
||||
entry_name = metadata.get(name_key, {})
|
||||
|
||||
combined: Dict[str, set[str]] = {
|
||||
"themeTags": set(_coerce_tag_list(row.get("themeTags"))),
|
||||
"creatureTypes": set(_coerce_tag_list(row.get("creatureTypes"))),
|
||||
"roleTags": set(_coerce_tag_list(row.get("roleTags"))),
|
||||
}
|
||||
|
||||
for source in (entry_face, entry_name):
|
||||
for column in combined:
|
||||
combined[column].update(source.get(column, []))
|
||||
|
||||
theme_values.append(sorted(combined["themeTags"]))
|
||||
creature_values.append(sorted(combined["creatureTypes"]))
|
||||
role_values.append(sorted(combined["roleTags"]))
|
||||
|
||||
df["themeTags"] = theme_values
|
||||
df["creatureTypes"] = creature_values
|
||||
df["roleTags"] = role_values
|
||||
|
||||
enriched_rows = sum(1 for t, c, r in zip(theme_values, creature_values, role_values) if t or c or r)
|
||||
logger.debug("Enriched %d commander rows with tag metadata", enriched_rows)
|
||||
|
||||
return df
|
||||
|
||||
# Type definitions
|
||||
class FilterRule(TypedDict):
|
||||
"""Type definition for filter rules configuration."""
|
||||
exclude: Optional[List[str]]
|
||||
require: Optional[List[str]]
|
||||
|
||||
class FilterConfig(TypedDict):
|
||||
"""Type definition for complete filter configuration."""
|
||||
layout: FilterRule
|
||||
availability: FilterRule
|
||||
promoTypes: FilterRule
|
||||
securityStamp: FilterRule
|
||||
def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
|
||||
"""Download cards data from MTGJSON and save to CSV.
|
||||
|
||||
Downloads card data from the specified MTGJSON URL and saves it to a local CSV file.
|
||||
Shows a progress bar during download using tqdm.
|
||||
|
||||
Args:
|
||||
url: URL to download cards data from (typically MTGJSON API endpoint)
|
||||
output_path: Path where the downloaded CSV file will be saved
|
||||
|
||||
Raises:
|
||||
MTGJSONDownloadError: If download fails due to network issues or invalid response
|
||||
|
||||
Example:
|
||||
>>> download_cards_csv('https://mtgjson.com/api/v5/cards.csv', 'cards.csv')
|
||||
"""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
|
||||
with open(output_path, 'wb') as f:
|
||||
with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
size = f.write(chunk)
|
||||
pbar.update(size)
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f'Failed to download cards data from {url}')
|
||||
raise MTGJSONDownloadError(
|
||||
"Failed to download cards data",
|
||||
url,
|
||||
getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
|
||||
) from e
|
||||
def check_csv_exists(filepath: Union[str, Path]) -> bool:
|
||||
"""Check if a CSV file exists at the specified path.
|
||||
|
||||
Verifies the existence of a CSV file at the given path. This function is used
|
||||
to determine if card data needs to be downloaded or if it already exists locally.
|
||||
|
||||
Args:
|
||||
filepath: Path to the CSV file to check
|
||||
|
||||
Returns:
|
||||
bool: True if the file exists, False otherwise
|
||||
|
||||
Example:
|
||||
>>> if not check_csv_exists('cards.csv'):
|
||||
... download_cards_csv(MTGJSON_API_URL, 'cards.csv')
|
||||
"""
|
||||
return Path(filepath).is_file()
|
||||
|
||||
def save_color_filtered_csvs(df: pd.DataFrame, out_dir: Union[str, Path]) -> None:
|
||||
"""Generate and save color-identity filtered CSVs for all configured colors.
|
||||
|
||||
Iterates across configured color names and their corresponding color identity
|
||||
abbreviations, filters the provided DataFrame using standard filters plus
|
||||
color identity, and writes each filtered set to CSV in the provided directory.
|
||||
|
||||
Args:
|
||||
df: Source DataFrame containing card data.
|
||||
out_dir: Output directory for the generated CSV files.
|
||||
|
||||
Raises:
|
||||
DataFrameProcessingError: If filtering fails.
|
||||
ColorFilterError: If color filtering fails for a specific color.
|
||||
"""
|
||||
out_path = Path(out_dir)
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Base-filter once for efficiency, then per-color filter without redoing base filters
|
||||
try:
|
||||
# Apply full standard filtering including banned list once, then slice per color
|
||||
base_df = filter_dataframe(df, BANNED_CARDS)
|
||||
except Exception as e:
|
||||
# Wrap any unexpected issues as DataFrameProcessingError
|
||||
raise DataFrameProcessingError(
|
||||
"Failed to prepare base DataFrame for color filtering",
|
||||
"base_color_filtering",
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
for color_name, color_id in zip(SETUP_COLORS, COLOR_ABRV):
|
||||
try:
|
||||
logger.info(f"Generating {color_name}_cards.csv")
|
||||
color_df = base_df[base_df['colorIdentity'] == color_id]
|
||||
color_df.to_csv(out_path / f"{color_name}_cards.csv", index=False)
|
||||
except Exception as e:
|
||||
raise ColorFilterError(
|
||||
"Failed to generate color CSV",
|
||||
color_id,
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
|
||||
"""Apply standard filters to the cards DataFrame using configuration from settings.
|
||||
|
||||
Applies a series of filters to the cards DataFrame based on configuration from settings.py.
|
||||
This includes handling null values, applying basic filters, removing illegal sets and banned cards,
|
||||
and processing special card types.
|
||||
|
||||
Args:
|
||||
df: pandas DataFrame containing card data to filter
|
||||
banned_cards: List of card names that are banned and should be excluded
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: A new DataFrame containing only the cards that pass all filters
|
||||
|
||||
Raises:
|
||||
DataFrameProcessingError: If any filtering operation fails
|
||||
|
||||
Example:
|
||||
>>> filtered_df = filter_dataframe(cards_df, ['Channel', 'Black Lotus'])
|
||||
"""
|
||||
try:
|
||||
logger.info('Starting standard DataFrame filtering')
|
||||
|
||||
# Fill null values according to configuration
|
||||
for col, fill_value in FILL_NA_COLUMNS.items():
|
||||
if col == 'faceName':
|
||||
fill_value = df['name']
|
||||
df[col] = df[col].fillna(fill_value)
|
||||
logger.debug(f'Filled NA values in {col} with {fill_value}')
|
||||
|
||||
# Apply basic filters from configuration
|
||||
filtered_df = df.copy()
|
||||
filter_config: FilterConfig = FILTER_CONFIG # Type hint for configuration
|
||||
for field, rules in filter_config.items():
|
||||
if field not in filtered_df.columns:
|
||||
logger.warning('Skipping filter for missing field %s', field)
|
||||
continue
|
||||
|
||||
for rule_type, values in rules.items():
|
||||
if not values:
|
||||
continue
|
||||
|
||||
if rule_type == 'exclude':
|
||||
for value in values:
|
||||
mask = filtered_df[field].astype(str).str.contains(
|
||||
value,
|
||||
case=False,
|
||||
na=False,
|
||||
regex=False
|
||||
)
|
||||
filtered_df = filtered_df[~mask]
|
||||
elif rule_type == 'require':
|
||||
for value in values:
|
||||
mask = filtered_df[field].astype(str).str.contains(
|
||||
value,
|
||||
case=False,
|
||||
na=False,
|
||||
regex=False
|
||||
)
|
||||
filtered_df = filtered_df[mask]
|
||||
else:
|
||||
logger.warning('Unknown filter rule type %s for field %s', rule_type, field)
|
||||
continue
|
||||
|
||||
logger.debug(f'Applied {rule_type} filter for {field}: {values}')
|
||||
|
||||
# Remove illegal sets
|
||||
for set_code in NON_LEGAL_SETS:
|
||||
filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
|
||||
logger.debug('Removed illegal sets')
|
||||
|
||||
# Remove banned cards (exact, case-insensitive match on name or faceName)
|
||||
if banned_cards:
|
||||
banned_set = {b.casefold() for b in banned_cards}
|
||||
name_lc = filtered_df['name'].astype(str).str.casefold()
|
||||
face_lc = filtered_df['faceName'].astype(str).str.casefold()
|
||||
mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
|
||||
before = len(filtered_df)
|
||||
filtered_df = filtered_df[mask]
|
||||
after = len(filtered_df)
|
||||
logger.debug(f'Removed banned cards: {before - after} filtered out')
|
||||
|
||||
# Remove special card types
|
||||
for card_type in CARD_TYPES_TO_EXCLUDE:
|
||||
filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
|
||||
logger.debug('Removed special card types')
|
||||
|
||||
# Select columns, sort, and drop duplicates
|
||||
filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
|
||||
filtered_df = filtered_df.sort_values(
|
||||
by=SORT_CONFIG['columns'],
|
||||
key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
|
||||
)
|
||||
filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
|
||||
logger.info('Completed standard DataFrame filtering')
|
||||
|
||||
return filtered_df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to filter DataFrame: {str(e)}')
|
||||
raise DataFrameProcessingError(
|
||||
"Failed to filter DataFrame",
|
||||
"standard_filtering",
|
||||
str(e)
|
||||
) from e
|
||||
def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
|
||||
"""Filter DataFrame by color identity with additional color-specific processing.
|
||||
|
||||
This function extends the base filter_dataframe functionality with color-specific
|
||||
filtering logic. It is used by setup.py's filter_by_color function but provides
|
||||
a more robust and configurable implementation.
|
||||
|
||||
Args:
|
||||
df: DataFrame to filter
|
||||
color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
|
||||
|
||||
Returns:
|
||||
DataFrame filtered by color identity
|
||||
|
||||
Raises:
|
||||
ColorFilterError: If color identity is invalid or filtering fails
|
||||
DataFrameProcessingError: If general filtering operations fail
|
||||
"""
|
||||
try:
|
||||
logger.info(f'Filtering cards for color identity: {color_identity}')
|
||||
|
||||
# Validate color identity
|
||||
with tqdm(total=1, desc='Validating color identity') as pbar:
|
||||
if not isinstance(color_identity, str):
|
||||
raise ColorFilterError(
|
||||
"Invalid color identity type",
|
||||
str(color_identity),
|
||||
"Color identity must be a string"
|
||||
)
|
||||
pbar.update(1)
|
||||
|
||||
# Apply base filtering
|
||||
with tqdm(total=1, desc='Applying base filtering') as pbar:
|
||||
filtered_df = filter_dataframe(df, BANNED_CARDS)
|
||||
pbar.update(1)
|
||||
|
||||
# Filter by color identity
|
||||
with tqdm(total=1, desc='Filtering by color identity') as pbar:
|
||||
filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
|
||||
logger.debug(f'Applied color identity filter: {color_identity}')
|
||||
pbar.update(1)
|
||||
|
||||
# Additional color-specific processing
|
||||
with tqdm(total=1, desc='Performing color-specific processing') as pbar:
|
||||
# Placeholder for future color-specific processing
|
||||
pbar.update(1)
|
||||
logger.info(f'Completed color identity filtering for {color_identity}')
|
||||
return filtered_df
|
||||
|
||||
except DataFrameProcessingError as e:
|
||||
raise ColorFilterError(
|
||||
"Color filtering failed",
|
||||
color_identity,
|
||||
str(e)
|
||||
) from e
|
||||
except Exception as e:
|
||||
raise ColorFilterError(
|
||||
"Unexpected error during color filtering",
|
||||
color_identity,
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Process and filter legendary cards for commander eligibility with comprehensive validation.
|
||||
|
||||
Args:
|
||||
df: DataFrame containing all cards
|
||||
|
||||
Returns:
|
||||
DataFrame containing only commander-eligible cards
|
||||
|
||||
Raises:
|
||||
CommanderValidationError: If validation fails for legendary status, special cases, or set legality
|
||||
DataFrameProcessingError: If general processing fails
|
||||
"""
|
||||
try:
|
||||
logger.info('Starting commander validation process')
|
||||
|
||||
filtered_df = df.copy()
|
||||
# Step 1: Check legendary status
|
||||
try:
|
||||
with tqdm(total=1, desc='Checking legendary status') as pbar:
|
||||
# Normalize type line for matching
|
||||
type_line = filtered_df['type'].astype(str).str.lower()
|
||||
|
||||
# Base predicates
|
||||
is_legendary = type_line.str.contains('legendary')
|
||||
is_creature = type_line.str.contains('creature')
|
||||
# Planeswalkers are only eligible if they explicitly state they can be your commander (handled in special cases step)
|
||||
is_enchantment = type_line.str.contains('enchantment')
|
||||
is_artifact = type_line.str.contains('artifact')
|
||||
is_vehicle_or_spacecraft = type_line.str.contains('vehicle') | type_line.str.contains('spacecraft')
|
||||
|
||||
# 1. Always allow Legendary Creatures (includes artifact/enchantment creatures already)
|
||||
allow_legendary_creature = is_legendary & is_creature
|
||||
|
||||
# 2. Allow Legendary Enchantment Creature (already covered by legendary creature) – ensure no plain legendary enchantments without creature type slip through
|
||||
allow_enchantment_creature = is_legendary & is_enchantment & is_creature
|
||||
|
||||
# 3. Allow certain Legendary Artifacts:
|
||||
# a) Vehicles/Spacecraft that have printed power & toughness
|
||||
has_power_toughness = filtered_df['power'].notna() & filtered_df['toughness'].notna()
|
||||
allow_artifact_vehicle = is_legendary & is_artifact & is_vehicle_or_spacecraft & has_power_toughness
|
||||
|
||||
# (Artifacts or planeswalkers with explicit permission text will be added in special cases step.)
|
||||
|
||||
baseline_mask = allow_legendary_creature | allow_enchantment_creature | allow_artifact_vehicle
|
||||
filtered_df = filtered_df[baseline_mask].copy()
|
||||
|
||||
if filtered_df.empty:
|
||||
raise CommanderValidationError(
|
||||
"No baseline eligible commanders found",
|
||||
"legendary_check",
|
||||
"After applying commander rules no cards qualified"
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Baseline commander counts: total=%d legendary_creatures=%d enchantment_creatures=%d artifact_vehicles=%d",
|
||||
len(filtered_df),
|
||||
int((allow_legendary_creature).sum()),
|
||||
int((allow_enchantment_creature).sum()),
|
||||
int((allow_artifact_vehicle).sum())
|
||||
)
|
||||
pbar.update(1)
|
||||
except Exception as e:
|
||||
raise CommanderValidationError(
|
||||
"Legendary status check failed",
|
||||
"legendary_check",
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
# Step 2: Validate special cases
|
||||
try:
|
||||
with tqdm(total=1, desc='Validating special cases') as pbar:
|
||||
# Add any card (including planeswalkers, artifacts, non-legendary cards) that explicitly allow being a commander
|
||||
special_cases = df['text'].str.contains('can be your commander', na=False, case=False)
|
||||
special_commanders = df[special_cases].copy()
|
||||
filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
|
||||
logger.debug(f'Added {len(special_commanders)} special commander cards')
|
||||
pbar.update(1)
|
||||
except Exception as e:
|
||||
raise CommanderValidationError(
|
||||
"Special case validation failed",
|
||||
"special_cases",
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
# Step 3: Verify set legality
|
||||
try:
|
||||
with tqdm(total=1, desc='Verifying set legality') as pbar:
|
||||
initial_count = len(filtered_df)
|
||||
for set_code in NON_LEGAL_SETS:
|
||||
filtered_df = filtered_df[
|
||||
~filtered_df['printings'].str.contains(set_code, na=False)
|
||||
]
|
||||
removed_count = initial_count - len(filtered_df)
|
||||
logger.debug(f'Removed {removed_count} cards from illegal sets')
|
||||
pbar.update(1)
|
||||
except Exception as e:
|
||||
raise CommanderValidationError(
|
||||
"Set legality verification failed",
|
||||
"set_legality",
|
||||
str(e)
|
||||
) from e
|
||||
filtered_df = _enforce_primary_face_commander_rules(filtered_df, df)
|
||||
|
||||
logger.info('Commander validation complete. %d valid commanders found', len(filtered_df))
|
||||
return filtered_df
|
||||
|
||||
except CommanderValidationError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise DataFrameProcessingError(
|
||||
"Failed to process legendary cards",
|
||||
"commander_processing",
|
||||
str(e)
|
||||
) from e
|
||||
|
||||
def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
|
||||
include_commander_cols: bool = False, skip_availability_checks: bool = False) -> CardLibraryDF:
|
||||
"""Process DataFrame with common operations in batches.
|
||||
|
||||
Args:
|
||||
df: DataFrame to process
|
||||
batch_size: Size of batches for processing
|
||||
columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
|
||||
include_commander_cols: Whether to include commander-specific columns
|
||||
skip_availability_checks: Whether to skip availability and security checks (default: False)
|
||||
|
||||
Args:
|
||||
df: DataFrame to process
|
||||
batch_size: Size of batches for processing
|
||||
columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
|
||||
include_commander_cols: Whether to include commander-specific columns
|
||||
|
||||
Returns:
|
||||
CardLibraryDF: Processed DataFrame with standardized structure
|
||||
"""
|
||||
logger.info("Processing card DataFrame...")
|
||||
|
||||
if columns_to_keep is None:
|
||||
columns_to_keep = TAGGED_COLUMN_ORDER.copy()
|
||||
if include_commander_cols:
|
||||
commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
|
||||
columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
|
||||
|
||||
# Fill NA values
|
||||
df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
|
||||
df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
|
||||
|
||||
# Process in batches
|
||||
total_batches = len(df) // batch_size + 1
|
||||
processed_dfs = []
|
||||
|
||||
for i in tqdm(range(total_batches), desc="Processing batches"):
|
||||
start_idx = i * batch_size
|
||||
end_idx = min((i + 1) * batch_size, len(df))
|
||||
batch = df.iloc[start_idx:end_idx].copy()
|
||||
|
||||
if not skip_availability_checks:
|
||||
columns_to_keep = COLUMN_ORDER.copy()
|
||||
logger.debug("Performing column checks...")
|
||||
# Common processing steps
|
||||
batch = batch[batch['availability'].str.contains('paper', na=False)]
|
||||
batch = batch.loc[batch['layout'] != 'reversible_card']
|
||||
batch = batch.loc[batch['promoTypes'] != 'playtest']
|
||||
batch = batch.loc[batch['securityStamp'] != 'heart']
|
||||
batch = batch.loc[batch['securityStamp'] != 'acorn']
|
||||
# Keep only specified columns
|
||||
batch = batch[columns_to_keep]
|
||||
processed_dfs.append(batch)
|
||||
else:
|
||||
logger.debug("Skipping column checks...")
|
||||
# Even when skipping availability checks, still ensure columns_to_keep if provided
|
||||
if columns_to_keep is not None:
|
||||
try:
|
||||
batch = batch[columns_to_keep]
|
||||
except Exception:
|
||||
# If requested columns are not present, keep as-is
|
||||
pass
|
||||
processed_dfs.append(batch)
|
||||
|
||||
# Combine processed batches
|
||||
result = pd.concat(processed_dfs, ignore_index=True)
|
||||
|
||||
# Final processing
|
||||
result.drop_duplicates(subset='faceName', keep='first', inplace=True)
|
||||
result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
|
||||
|
||||
logger.info("DataFrame processing completed")
|
||||
return result
|
||||
|
||||
# Backward-compatibility wrapper used by deck_builder.builder
|
||||
def regenerate_csvs_all() -> None: # pragma: no cover - simple delegator
|
||||
"""Delegate to setup.regenerate_csvs_all to preserve existing imports.
|
||||
|
||||
Some modules import regenerate_csvs_all from setup_utils. Keep this
|
||||
function as a stable indirection to avoid breaking callers.
|
||||
"""
|
||||
from . import setup as setup_module # local import to avoid circular import
|
||||
setup_module.regenerate_csvs_all()
|
||||
|
|
@ -1,362 +1,374 @@
|
|||
"""MTG Python Deckbuilder setup module.
|
||||
"""Parquet-based setup for MTG Python Deckbuilder.
|
||||
|
||||
This module provides the main setup functionality for the MTG Python Deckbuilder
|
||||
application. It handles initial setup tasks such as downloading card data,
|
||||
creating color-filtered card lists, and gener logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
This module handles downloading and processing MTGJSON Parquet data for the
|
||||
MTG Python Deckbuilder. It replaces the old CSV-based multi-file approach
|
||||
with a single-file Parquet workflow.
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
Key Changes from CSV approach:
|
||||
- Single all_cards.parquet file instead of 18+ color-specific CSVs
|
||||
- Downloads from MTGJSON Parquet API (faster, smaller)
|
||||
- Adds isCommander and isBackground boolean flags
|
||||
- Filters to essential columns only (14 base + 4 custom = 18 total)
|
||||
- Uses DataLoader abstraction for format flexibility
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
|
||||
|
||||
Key Features:
|
||||
- Initial setup and configuration
|
||||
- Card data download and processing
|
||||
- Color-based card filtering
|
||||
- Commander card list generation
|
||||
- CSV file management and validation
|
||||
|
||||
The module works in conjunction with setup_utils.py for utility functions and
|
||||
exceptions.py for error handling.
|
||||
Introduced in v3.0.0 as part of CSV→Parquet migration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# Standard library imports
|
||||
from enum import Enum
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# Third-party imports (optional)
|
||||
try:
|
||||
import inquirer # type: ignore
|
||||
except Exception:
|
||||
inquirer = None # Fallback to simple input-based menu when unavailable
|
||||
import pandas as pd
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
# Local imports
|
||||
from .data_loader import DataLoader, validate_schema
|
||||
from .setup_constants import (
|
||||
CSV_PROCESSING_COLUMNS,
|
||||
CARD_TYPES_TO_EXCLUDE,
|
||||
NON_LEGAL_SETS,
|
||||
BANNED_CARDS,
|
||||
FILTER_CONFIG,
|
||||
SORT_CONFIG,
|
||||
)
|
||||
import logging_util
|
||||
from settings import CSV_DIRECTORY
|
||||
from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
|
||||
from .setup_utils import (
|
||||
download_cards_csv,
|
||||
filter_dataframe,
|
||||
process_legendary_cards,
|
||||
check_csv_exists,
|
||||
save_color_filtered_csvs,
|
||||
enrich_commander_rows_with_tags,
|
||||
)
|
||||
from exceptions import (
|
||||
CSVFileNotFoundError,
|
||||
CommanderValidationError,
|
||||
MTGJSONDownloadError
|
||||
)
|
||||
from scripts import generate_background_cards as background_cards_script
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
from path_util import card_files_raw_dir, get_processed_cards_path
|
||||
import settings
|
||||
|
||||
logger = logging_util.get_logger(__name__)
|
||||
|
||||
# MTGJSON Parquet API URL
|
||||
MTGJSON_PARQUET_URL = "https://mtgjson.com/api/v5/parquet/cards.parquet"
|
||||
|
||||
|
||||
def _generate_background_catalog(cards_path: str, output_path: str) -> None:
|
||||
"""Regenerate ``background_cards.csv`` from the latest cards dataset."""
|
||||
|
||||
logger.info('Generating background cards catalog')
|
||||
args = [
|
||||
'--source', cards_path,
|
||||
'--output', output_path,
|
||||
]
|
||||
try:
|
||||
background_cards_script.main(args)
|
||||
except Exception: # pragma: no cover - surfaced to caller/test
|
||||
logger.exception('Failed to generate background catalog')
|
||||
raise
|
||||
else:
|
||||
logger.info('Background cards catalog generated successfully')
|
||||
|
||||
# Create logger for this module
|
||||
logger = logging_util.logging.getLogger(__name__)
|
||||
logger.setLevel(logging_util.LOG_LEVEL)
|
||||
logger.addHandler(logging_util.file_handler)
|
||||
logger.addHandler(logging_util.stream_handler)
|
||||
|
||||
# Create CSV directory if it doesn't exist
|
||||
if not os.path.exists(CSV_DIRECTORY):
|
||||
os.makedirs(CSV_DIRECTORY)
|
||||
|
||||
## Note: using shared check_csv_exists from setup_utils to avoid duplication
|
||||
|
||||
def initial_setup() -> None:
|
||||
"""Perform initial setup by downloading card data and creating filtered CSV files.
|
||||
|
||||
Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
|
||||
and generates commander-eligible cards list. Uses utility functions from setup_utils.py
|
||||
for file operations and data processing.
|
||||
|
||||
Raises:
|
||||
CSVFileNotFoundError: If required CSV files cannot be found
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
logger.info('Checking for cards.csv file')
|
||||
|
||||
try:
|
||||
cards_file = f'{CSV_DIRECTORY}/cards.csv'
|
||||
try:
|
||||
with open(cards_file, 'r', encoding='utf-8'):
|
||||
logger.info('cards.csv exists')
|
||||
except FileNotFoundError:
|
||||
logger.info('cards.csv not found, downloading from mtgjson')
|
||||
download_cards_csv(MTGJSON_API_URL, cards_file)
|
||||
|
||||
df = pd.read_csv(cards_file, low_memory=False)
|
||||
|
||||
logger.info('Checking for color identity sorted files')
|
||||
# Generate color-identity filtered CSVs in one pass
|
||||
save_color_filtered_csvs(df, CSV_DIRECTORY)
|
||||
|
||||
# Generate commander list
|
||||
determine_commanders()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error during initial setup: {str(e)}')
|
||||
raise
|
||||
|
||||
## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
|
||||
|
||||
def determine_commanders() -> None:
|
||||
"""Generate commander_cards.csv containing all cards eligible to be commanders.
|
||||
|
||||
This function processes the card database to identify and validate commander-eligible cards,
|
||||
applying comprehensive validation steps and filtering criteria.
|
||||
|
||||
Raises:
|
||||
CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
|
||||
MTGJSONDownloadError: If downloading cards data fails
|
||||
CommanderValidationError: If commander validation fails
|
||||
DataFrameProcessingError: If data processing operations fail
|
||||
"""
|
||||
logger.info('Starting commander card generation process')
|
||||
|
||||
try:
|
||||
# Check for cards.csv with progress tracking
|
||||
cards_file = f'{CSV_DIRECTORY}/cards.csv'
|
||||
if not check_csv_exists(cards_file):
|
||||
logger.info('cards.csv not found, initiating download')
|
||||
download_cards_csv(MTGJSON_API_URL, cards_file)
|
||||
else:
|
||||
logger.info('cards.csv found, proceeding with processing')
|
||||
|
||||
# Load and process cards data
|
||||
logger.info('Loading card data from CSV')
|
||||
df = pd.read_csv(cards_file, low_memory=False)
|
||||
|
||||
# Process legendary cards with validation
|
||||
logger.info('Processing and validating legendary cards')
|
||||
try:
|
||||
filtered_df = process_legendary_cards(df)
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation failed: {str(e)}')
|
||||
raise
|
||||
|
||||
# Apply standard filters
|
||||
logger.info('Applying standard card filters')
|
||||
filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
|
||||
|
||||
logger.info('Enriching commander metadata with theme and creature tags')
|
||||
filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
|
||||
|
||||
# Save commander cards
|
||||
logger.info('Saving validated commander cards')
|
||||
commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
|
||||
filtered_df.to_csv(commander_path, index=False)
|
||||
|
||||
background_output = f'{CSV_DIRECTORY}/background_cards.csv'
|
||||
_generate_background_catalog(cards_file, background_output)
|
||||
|
||||
logger.info('Commander card generation completed successfully')
|
||||
|
||||
except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
|
||||
logger.error(f'File operation error: {str(e)}')
|
||||
raise
|
||||
except CommanderValidationError as e:
|
||||
logger.error(f'Commander validation error: {str(e)}')
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f'Unexpected error during commander generation: {str(e)}')
|
||||
raise
|
||||
|
||||
def regenerate_csvs_all() -> None:
|
||||
"""Regenerate all color-filtered CSV files from latest card data.
|
||||
|
||||
Downloads fresh card data and recreates all color-filtered CSV files.
|
||||
Useful for updating the card database when new sets are released.
|
||||
|
||||
Raises:
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
"""
|
||||
try:
|
||||
logger.info('Downloading latest card data from MTGJSON')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
try:
|
||||
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='warn', # Warn about malformed rows but continue
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
|
||||
|
||||
logger.info('Regenerating color identity sorted files')
|
||||
save_color_filtered_csvs(df, CSV_DIRECTORY)
|
||||
|
||||
logger.info('Regenerating commander cards')
|
||||
determine_commanders()
|
||||
|
||||
logger.info('Card database regeneration complete')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate card database: {str(e)}')
|
||||
raise
|
||||
# Once files are regenerated, create a new legendary list (already executed in try)
|
||||
|
||||
def regenerate_csv_by_color(color: str) -> None:
|
||||
"""Regenerate CSV file for a specific color identity.
|
||||
def download_parquet_from_mtgjson(output_path: str) -> None:
|
||||
"""Download MTGJSON cards.parquet file.
|
||||
|
||||
Args:
|
||||
color: Color name to regenerate CSV for (e.g. 'white', 'blue')
|
||||
output_path: Where to save the downloaded Parquet file
|
||||
|
||||
Raises:
|
||||
ValueError: If color is not valid
|
||||
MTGJSONDownloadError: If card data download fails
|
||||
DataFrameProcessingError: If data processing fails
|
||||
ColorFilterError: If color filtering fails
|
||||
requests.RequestException: If download fails
|
||||
IOError: If file cannot be written
|
||||
"""
|
||||
logger.info(f"Downloading MTGJSON Parquet from {MTGJSON_PARQUET_URL}")
|
||||
|
||||
try:
|
||||
if color not in SETUP_COLORS:
|
||||
raise ValueError(f'Invalid color: {color}')
|
||||
|
||||
color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
|
||||
|
||||
logger.info(f'Downloading latest card data for {color} cards')
|
||||
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
|
||||
|
||||
logger.info('Loading and processing card data')
|
||||
df = pd.read_csv(
|
||||
f'{CSV_DIRECTORY}/cards.csv',
|
||||
low_memory=False,
|
||||
on_bad_lines='skip', # Skip malformed rows (MTGJSON CSV has escaping issues)
|
||||
encoding_errors='replace' # Replace bad encoding chars
|
||||
)
|
||||
|
||||
logger.info(f'Regenerating {color} cards CSV')
|
||||
# Use shared utilities to base-filter once then slice color, honoring bans
|
||||
base_df = filter_dataframe(df, BANNED_CARDS)
|
||||
base_df[base_df['colorIdentity'] == color_abv].to_csv(
|
||||
f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
|
||||
)
|
||||
|
||||
logger.info(f'Successfully regenerated {color} cards database')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to regenerate {color} cards: {str(e)}')
|
||||
response = requests.get(MTGJSON_PARQUET_URL, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
# Get file size for progress bar
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Download with progress bar
|
||||
with open(output_path, 'wb') as f, tqdm(
|
||||
total=total_size,
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
desc='Downloading cards.parquet'
|
||||
) as pbar:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
|
||||
logger.info(f"✓ Downloaded {total_size / (1024**2):.2f} MB to {output_path}")
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to download MTGJSON Parquet: {e}")
|
||||
raise
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write Parquet file: {e}")
|
||||
raise
|
||||
|
||||
class SetupOption(Enum):
|
||||
"""Enum for setup menu options."""
|
||||
INITIAL_SETUP = 'Initial Setup'
|
||||
REGENERATE_CSV = 'Regenerate CSV Files'
|
||||
BACK = 'Back'
|
||||
|
||||
def _display_setup_menu() -> SetupOption:
|
||||
"""Display the setup menu and return the selected option.
|
||||
def is_valid_commander(row: pd.Series) -> bool:
|
||||
"""Determine if a card can be a commander.
|
||||
|
||||
Returns:
|
||||
SetupOption: The selected menu option
|
||||
"""
|
||||
if inquirer is not None:
|
||||
question: List[Dict[str, Any]] = [
|
||||
inquirer.List(
|
||||
'menu',
|
||||
choices=[option.value for option in SetupOption],
|
||||
carousel=True)]
|
||||
answer = inquirer.prompt(question)
|
||||
return SetupOption(answer['menu'])
|
||||
|
||||
# Simple fallback when inquirer isn't installed (e.g., headless/container)
|
||||
options = list(SetupOption)
|
||||
print("\nSetup Menu:")
|
||||
for idx, opt in enumerate(options, start=1):
|
||||
print(f" {idx}) {opt.value}")
|
||||
while True:
|
||||
try:
|
||||
sel = input("Select an option [1]: ").strip() or "1"
|
||||
i = int(sel)
|
||||
if 1 <= i <= len(options):
|
||||
return options[i - 1]
|
||||
except KeyboardInterrupt:
|
||||
print("")
|
||||
return SetupOption.BACK
|
||||
except Exception:
|
||||
pass
|
||||
print("Invalid selection. Please try again.")
|
||||
|
||||
def setup() -> bool:
|
||||
"""Run the setup process for the MTG Python Deckbuilder.
|
||||
Criteria:
|
||||
- Legendary Creature
|
||||
- OR: Has "can be your commander" in text
|
||||
- OR: Background (Partner with Background)
|
||||
|
||||
This function provides a menu-driven interface to:
|
||||
1. Perform initial setup by downloading and processing card data
|
||||
2. Regenerate CSV files with updated card data
|
||||
3. Perform all tagging processes on the color-sorted csv files
|
||||
|
||||
The function handles errors gracefully and provides feedback through logging.
|
||||
|
||||
Returns:
|
||||
bool: True if setup completed successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
print('Which setup operation would you like to perform?\n'
|
||||
'If this is your first time setting up, do the initial setup.\n'
|
||||
'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
|
||||
Args:
|
||||
row: DataFrame row with card data
|
||||
|
||||
choice = _display_setup_menu()
|
||||
|
||||
if choice == SetupOption.INITIAL_SETUP:
|
||||
logger.info('Starting initial setup')
|
||||
initial_setup()
|
||||
logger.info('Initial setup completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.REGENERATE_CSV:
|
||||
logger.info('Starting CSV regeneration')
|
||||
regenerate_csvs_all()
|
||||
logger.info('CSV regeneration completed successfully')
|
||||
return True
|
||||
|
||||
elif choice == SetupOption.BACK:
|
||||
logger.info('Setup cancelled by user')
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Error during setup: {e}')
|
||||
raise
|
||||
Returns:
|
||||
True if card can be a commander
|
||||
"""
|
||||
type_line = str(row.get('type', ''))
|
||||
text = str(row.get('text', '')).lower()
|
||||
|
||||
# Legendary Creature
|
||||
if 'Legendary' in type_line and 'Creature' in type_line:
|
||||
return True
|
||||
|
||||
# Special text (e.g., "can be your commander")
|
||||
if 'can be your commander' in text:
|
||||
return True
|
||||
|
||||
# Backgrounds can be commanders (with Choose a Background)
|
||||
if 'Background' in type_line:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_background(row: pd.Series) -> bool:
|
||||
"""Determine if a card is a Background.
|
||||
|
||||
Args:
|
||||
row: DataFrame row with card data
|
||||
|
||||
Returns:
|
||||
True if card has Background type
|
||||
"""
|
||||
type_line = str(row.get('type', ''))
|
||||
return 'Background' in type_line
|
||||
|
||||
|
||||
def extract_creature_types(row: pd.Series) -> str:
|
||||
"""Extract creature types from type line.
|
||||
|
||||
Args:
|
||||
row: DataFrame row with card data
|
||||
|
||||
Returns:
|
||||
Comma-separated creature types or empty string
|
||||
"""
|
||||
type_line = str(row.get('type', ''))
|
||||
|
||||
# Check if it's a creature
|
||||
if 'Creature' not in type_line:
|
||||
return ''
|
||||
|
||||
# Split on — to get subtypes
|
||||
if '—' in type_line:
|
||||
parts = type_line.split('—')
|
||||
if len(parts) >= 2:
|
||||
# Get everything after the dash, strip whitespace
|
||||
subtypes = parts[1].strip()
|
||||
return subtypes
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
def process_raw_parquet(raw_path: str, output_path: str) -> pd.DataFrame:
|
||||
"""Process raw MTGJSON Parquet into processed all_cards.parquet.
|
||||
|
||||
This function:
|
||||
1. Loads raw Parquet (all ~82 columns)
|
||||
2. Filters to essential columns (CSV_PROCESSING_COLUMNS)
|
||||
3. Applies standard filtering (banned cards, illegal sets, special types)
|
||||
4. Deduplicates by faceName (keep first printing only)
|
||||
5. Adds custom columns: creatureTypes, themeTags, isCommander, isBackground
|
||||
6. Validates schema
|
||||
7. Writes to processed directory
|
||||
|
||||
Args:
|
||||
raw_path: Path to raw cards.parquet from MTGJSON
|
||||
output_path: Path to save processed all_cards.parquet
|
||||
|
||||
Returns:
|
||||
Processed DataFrame
|
||||
|
||||
Raises:
|
||||
ValueError: If schema validation fails
|
||||
"""
|
||||
logger.info(f"Processing {raw_path}")
|
||||
|
||||
# Load raw Parquet with DataLoader
|
||||
loader = DataLoader()
|
||||
df = loader.read_cards(raw_path)
|
||||
|
||||
logger.info(f"Loaded {len(df)} cards with {len(df.columns)} columns")
|
||||
|
||||
# Step 1: Fill NA values
|
||||
logger.info("Filling NA values")
|
||||
for col, fill_value in settings.FILL_NA_COLUMNS.items():
|
||||
if col in df.columns:
|
||||
if col == 'faceName':
|
||||
df[col] = df[col].fillna(df['name'])
|
||||
else:
|
||||
df[col] = df[col].fillna(fill_value)
|
||||
|
||||
# Step 2: Apply configuration-based filters (FILTER_CONFIG)
|
||||
logger.info("Applying configuration filters")
|
||||
for field, rules in FILTER_CONFIG.items():
|
||||
if field not in df.columns:
|
||||
logger.warning(f"Skipping filter for missing field: {field}")
|
||||
continue
|
||||
|
||||
for rule_type, values in rules.items():
|
||||
if not values:
|
||||
continue
|
||||
|
||||
if rule_type == 'exclude':
|
||||
for value in values:
|
||||
mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
|
||||
before = len(df)
|
||||
df = df[~mask]
|
||||
logger.debug(f"Excluded {field} containing '{value}': {before - len(df)} removed")
|
||||
elif rule_type == 'require':
|
||||
for value in values:
|
||||
mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
|
||||
before = len(df)
|
||||
df = df[mask]
|
||||
logger.debug(f"Required {field} containing '{value}': {before - len(df)} removed")
|
||||
|
||||
# Step 3: Remove illegal sets
|
||||
if 'printings' in df.columns:
|
||||
logger.info("Removing illegal sets")
|
||||
for set_code in NON_LEGAL_SETS:
|
||||
before = len(df)
|
||||
df = df[~df['printings'].str.contains(set_code, na=False)]
|
||||
if len(df) < before:
|
||||
logger.debug(f"Removed set {set_code}: {before - len(df)} cards")
|
||||
|
||||
# Step 4: Remove banned cards
|
||||
logger.info("Removing banned cards")
|
||||
banned_set = {b.casefold() for b in BANNED_CARDS}
|
||||
name_lc = df['name'].astype(str).str.casefold()
|
||||
face_lc = df['faceName'].astype(str).str.casefold() if 'faceName' in df.columns else name_lc
|
||||
mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
|
||||
before = len(df)
|
||||
df = df[mask]
|
||||
logger.debug(f"Removed banned cards: {before - len(df)} filtered out")
|
||||
|
||||
# Step 5: Remove special card types
|
||||
logger.info("Removing special card types")
|
||||
for card_type in CARD_TYPES_TO_EXCLUDE:
|
||||
before = len(df)
|
||||
df = df[~df['type'].str.contains(card_type, na=False)]
|
||||
if len(df) < before:
|
||||
logger.debug(f"Removed type {card_type}: {before - len(df)} cards")
|
||||
|
||||
# Step 6: Filter to essential columns only (reduce from ~82 to 14)
|
||||
logger.info(f"Filtering to {len(CSV_PROCESSING_COLUMNS)} essential columns")
|
||||
df = df[CSV_PROCESSING_COLUMNS]
|
||||
|
||||
# Step 7: Sort and deduplicate (CRITICAL: keeps only one printing per unique card)
|
||||
logger.info("Sorting and deduplicating cards")
|
||||
df = df.sort_values(
|
||||
by=SORT_CONFIG['columns'],
|
||||
key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
|
||||
)
|
||||
before = len(df)
|
||||
df = df.drop_duplicates(subset='faceName', keep='first')
|
||||
logger.info(f"Deduplicated: {before} → {len(df)} cards ({before - len(df)} duplicate printings removed)")
|
||||
|
||||
# Step 8: Add custom columns
|
||||
logger.info("Adding custom columns: creatureTypes, themeTags, isCommander, isBackground")
|
||||
|
||||
# creatureTypes: extracted from type line
|
||||
df['creatureTypes'] = df.apply(extract_creature_types, axis=1)
|
||||
|
||||
# themeTags: empty placeholder (filled during tagging)
|
||||
df['themeTags'] = ''
|
||||
|
||||
# isCommander: boolean flag
|
||||
df['isCommander'] = df.apply(is_valid_commander, axis=1)
|
||||
|
||||
# isBackground: boolean flag
|
||||
df['isBackground'] = df.apply(is_background, axis=1)
|
||||
|
||||
# Reorder columns to match CARD_DATA_COLUMNS
|
||||
# CARD_DATA_COLUMNS has: name, faceName, edhrecRank, colorIdentity, colors,
|
||||
# manaCost, manaValue, type, creatureTypes, text,
|
||||
# power, toughness, keywords, themeTags, layout, side
|
||||
# We need to add isCommander and isBackground at the end
|
||||
final_columns = settings.CARD_DATA_COLUMNS + ['isCommander', 'isBackground']
|
||||
|
||||
# Ensure all columns exist
|
||||
for col in final_columns:
|
||||
if col not in df.columns:
|
||||
logger.warning(f"Column {col} missing, adding empty column")
|
||||
df[col] = ''
|
||||
|
||||
df = df[final_columns]
|
||||
|
||||
logger.info(f"Final dataset: {len(df)} cards, {len(df.columns)} columns")
|
||||
logger.info(f"Commanders: {df['isCommander'].sum()}")
|
||||
logger.info(f"Backgrounds: {df['isBackground'].sum()}")
|
||||
|
||||
# Validate schema (check required columns present)
|
||||
try:
|
||||
validate_schema(df)
|
||||
logger.info("✓ Schema validation passed")
|
||||
except ValueError as e:
|
||||
logger.error(f"Schema validation failed: {e}")
|
||||
raise
|
||||
|
||||
# Write to processed directory
|
||||
logger.info(f"Writing processed Parquet to {output_path}")
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
loader.write_cards(df, output_path)
|
||||
|
||||
logger.info(f"✓ Created {output_path}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def initial_setup() -> None:
|
||||
"""Download and process MTGJSON Parquet data.
|
||||
|
||||
Modern Parquet-based setup workflow (replaces legacy CSV approach).
|
||||
|
||||
Workflow:
|
||||
1. Download cards.parquet from MTGJSON → card_files/raw/cards.parquet
|
||||
2. Process and filter → card_files/processed/all_cards.parquet
|
||||
3. No color-specific files (filter at query time instead)
|
||||
|
||||
Raises:
|
||||
Various exceptions from download/processing steps
|
||||
"""
|
||||
logger.info("=" * 80)
|
||||
logger.info("Starting Parquet-based initial setup")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Step 1: Download raw Parquet
|
||||
raw_dir = card_files_raw_dir()
|
||||
raw_path = os.path.join(raw_dir, "cards.parquet")
|
||||
|
||||
if os.path.exists(raw_path):
|
||||
logger.info(f"Raw Parquet already exists: {raw_path}")
|
||||
logger.info("Skipping download (delete file to re-download)")
|
||||
else:
|
||||
download_parquet_from_mtgjson(raw_path)
|
||||
|
||||
# Step 2: Process raw → processed
|
||||
processed_path = get_processed_cards_path()
|
||||
|
||||
logger.info(f"Processing raw Parquet → {processed_path}")
|
||||
process_raw_parquet(raw_path, processed_path)
|
||||
|
||||
logger.info("=" * 80)
|
||||
logger.info("✓ Parquet setup complete")
|
||||
logger.info(f" Raw: {raw_path}")
|
||||
logger.info(f" Processed: {processed_path}")
|
||||
logger.info("=" * 80)
|
||||
|
||||
|
||||
def regenerate_processed_parquet() -> None:
|
||||
"""Regenerate processed Parquet from existing raw file.
|
||||
|
||||
Useful when:
|
||||
- Column processing logic changes
|
||||
- Adding new custom columns
|
||||
- Testing without re-downloading
|
||||
"""
|
||||
logger.info("Regenerating processed Parquet from raw file")
|
||||
|
||||
raw_path = os.path.join(card_files_raw_dir(), "cards.parquet")
|
||||
|
||||
if not os.path.exists(raw_path):
|
||||
logger.error(f"Raw Parquet not found: {raw_path}")
|
||||
logger.error("Run initial_setup_parquet() first to download")
|
||||
raise FileNotFoundError(f"Raw Parquet not found: {raw_path}")
|
||||
|
||||
processed_path = get_processed_cards_path()
|
||||
process_raw_parquet(raw_path, processed_path)
|
||||
|
||||
logger.info(f"✓ Regenerated {processed_path}")
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ __all__ = [
|
|||
# Banned cards consolidated here (remains specific to setup concerns)
|
||||
BANNED_CARDS: List[str] = [
|
||||
# Commander banned list
|
||||
'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
|
||||
'Chaos Orb', 'Channel', 'Dockside Extortionist',
|
||||
'1996 World Champion', 'Ancestral Recall', 'Balance', 'Biorhythm',
|
||||
'Black Lotus', 'Chaos Orb', 'Channel', 'Dockside Extortionist',
|
||||
'Emrakul, the Aeons Torn',
|
||||
'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
|
||||
'Flash', 'Golos, Tireless Pilgrim',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue