Moved the builder, tagger, and setup modules into their own folders, along with constants to help provide better clarity and readability. Additionally added a missing call for the tag_for_artifcact_triggers() function

This commit is contained in:
mwisnowski 2025-01-28 10:19:44 -08:00
parent 3a5beebfe2
commit dbbc8bc66e
20 changed files with 1525 additions and 1737 deletions

View file

@ -0,0 +1,8 @@
"""Initialize the file_setup package."""
from .setup import setup, regenerate_csv_by_color
__all__ = [
'setup',
'regenerate_csv_by_color'
]

337
code/file_setup/setup.py Normal file
View file

@ -0,0 +1,337 @@
"""MTG Python Deckbuilder setup module.
This module provides the main setup functionality for the MTG Python Deckbuilder
application. It handles initial setup tasks such as downloading card data,
creating color-filtered card lists, and generating commander-eligible card lists.
Key Features:
- Initial setup and configuration
- Card data download and processing
- Color-based card filtering
- Commander card list generation
- CSV file management and validation
The module works in conjunction with setup_utils.py for utility functions and
exceptions.py for error handling.
"""
from __future__ import annotations
# Standard library imports
import logging
from enum import Enum
import os
from pathlib import Path
from typing import Union, List, Dict, Any
# Third-party imports
import inquirer
import pandas as pd
# Local imports
import logging_util
from settings import CSV_DIRECTORY
from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
from .setup_utils import (
download_cards_csv,
filter_by_color_identity,
filter_dataframe,
process_legendary_cards
)
from exceptions import (
CSVFileNotFoundError,
ColorFilterError,
CommanderValidationError,
DataFrameProcessingError,
MTGJSONDownloadError
)
# Create logger for this module
logger = logging_util.logging.getLogger(__name__)
logger.setLevel(logging_util.LOG_LEVEL)
logger.addHandler(logging_util.file_handler)
logger.addHandler(logging_util.stream_handler)
# Create CSV directory if it doesn't exist
if not os.path.exists(CSV_DIRECTORY):
os.makedirs(CSV_DIRECTORY)
def check_csv_exists(file_path: Union[str, Path]) -> bool:
"""Check if a CSV file exists at the specified path.
Args:
file_path: Path to the CSV file to check
Returns:
bool: True if file exists, False otherwise
Raises:
CSVFileNotFoundError: If there are issues accessing the file path
"""
try:
with open(file_path, 'r', encoding='utf-8'):
return True
except FileNotFoundError:
return False
except Exception as e:
raise CSVFileNotFoundError(f'Error checking CSV file: {str(e)}')
def initial_setup() -> None:
"""Perform initial setup by downloading card data and creating filtered CSV files.
Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
and generates commander-eligible cards list. Uses utility functions from setup_utils.py
for file operations and data processing.
Raises:
CSVFileNotFoundError: If required CSV files cannot be found
MTGJSONDownloadError: If card data download fails
DataFrameProcessingError: If data processing fails
ColorFilterError: If color filtering fails
"""
logger.info('Checking for cards.csv file')
try:
cards_file = f'{CSV_DIRECTORY}/cards.csv'
try:
with open(cards_file, 'r', encoding='utf-8'):
logger.info('cards.csv exists')
except FileNotFoundError:
logger.info('cards.csv not found, downloading from mtgjson')
download_cards_csv(MTGJSON_API_URL, cards_file)
df = pd.read_csv(cards_file, low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
logger.info('Checking for color identity sorted files')
for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
logger.info(f'Checking for {SETUP_COLORS[i]}_cards.csv')
try:
with open(f'{CSV_DIRECTORY}/{SETUP_COLORS[i]}_cards.csv', 'r', encoding='utf-8'):
logger.info(f'{SETUP_COLORS[i]}_cards.csv exists')
except FileNotFoundError:
logger.info(f'{SETUP_COLORS[i]}_cards.csv not found, creating one')
filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'{CSV_DIRECTORY}/{SETUP_COLORS[i]}_cards.csv')
# Generate commander list
determine_commanders()
except Exception as e:
logger.error(f'Error during initial setup: {str(e)}')
raise
def filter_by_color(df: pd.DataFrame, column_name: str, value: str, new_csv_name: Union[str, Path]) -> None:
"""Filter DataFrame by color identity and save to CSV.
Args:
df: DataFrame to filter
column_name: Column to filter on (should be 'colorIdentity')
value: Color identity value to filter for
new_csv_name: Path to save filtered CSV
Raises:
ColorFilterError: If filtering fails
DataFrameProcessingError: If DataFrame processing fails
CSVFileNotFoundError: If CSV file operations fail
"""
try:
# Check if target CSV already exists
if check_csv_exists(new_csv_name):
logger.info(f'{new_csv_name} already exists, will be overwritten')
filtered_df = filter_by_color_identity(df, value)
filtered_df.to_csv(new_csv_name, index=False)
logger.info(f'Successfully created {new_csv_name}')
except (ColorFilterError, DataFrameProcessingError, CSVFileNotFoundError) as e:
logger.error(f'Failed to filter by color {value}: {str(e)}')
raise
def determine_commanders() -> None:
"""Generate commander_cards.csv containing all cards eligible to be commanders.
This function processes the card database to identify and validate commander-eligible cards,
applying comprehensive validation steps and filtering criteria.
Raises:
CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
MTGJSONDownloadError: If downloading cards data fails
CommanderValidationError: If commander validation fails
DataFrameProcessingError: If data processing operations fail
"""
logger.info('Starting commander card generation process')
try:
# Check for cards.csv with progress tracking
cards_file = f'{CSV_DIRECTORY}/cards.csv'
if not check_csv_exists(cards_file):
logger.info('cards.csv not found, initiating download')
download_cards_csv(MTGJSON_API_URL, cards_file)
else:
logger.info('cards.csv found, proceeding with processing')
# Load and process cards data
logger.info('Loading card data from CSV')
df = pd.read_csv(cards_file, low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
# Process legendary cards with validation
logger.info('Processing and validating legendary cards')
try:
filtered_df = process_legendary_cards(df)
except CommanderValidationError as e:
logger.error(f'Commander validation failed: {str(e)}')
raise
# Apply standard filters
logger.info('Applying standard card filters')
filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
# Save commander cards
logger.info('Saving validated commander cards')
filtered_df.to_csv(f'{CSV_DIRECTORY}/commander_cards.csv', index=False)
logger.info('Commander card generation completed successfully')
except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
logger.error(f'File operation error: {str(e)}')
raise
except CommanderValidationError as e:
logger.error(f'Commander validation error: {str(e)}')
raise
except Exception as e:
logger.error(f'Unexpected error during commander generation: {str(e)}')
raise
def regenerate_csvs_all() -> None:
"""Regenerate all color-filtered CSV files from latest card data.
Downloads fresh card data and recreates all color-filtered CSV files.
Useful for updating the card database when new sets are released.
Raises:
MTGJSONDownloadError: If card data download fails
DataFrameProcessingError: If data processing fails
ColorFilterError: If color filtering fails
"""
try:
logger.info('Downloading latest card data from MTGJSON')
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
logger.info('Loading and processing card data')
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
logger.info('Regenerating color identity sorted files')
for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
color = SETUP_COLORS[i]
color_id = COLOR_ABRV[i]
logger.info(f'Processing {color} cards')
filter_by_color(df, 'colorIdentity', color_id, f'{CSV_DIRECTORY}/{color}_cards.csv')
logger.info('Regenerating commander cards')
determine_commanders()
logger.info('Card database regeneration complete')
except Exception as e:
logger.error(f'Failed to regenerate card database: {str(e)}')
raise
# Once files are regenerated, create a new legendary list
determine_commanders()
def regenerate_csv_by_color(color: str) -> None:
"""Regenerate CSV file for a specific color identity.
Args:
color: Color name to regenerate CSV for (e.g. 'white', 'blue')
Raises:
ValueError: If color is not valid
MTGJSONDownloadError: If card data download fails
DataFrameProcessingError: If data processing fails
ColorFilterError: If color filtering fails
"""
try:
if color not in SETUP_COLORS:
raise ValueError(f'Invalid color: {color}')
color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
logger.info(f'Downloading latest card data for {color} cards')
download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
logger.info('Loading and processing card data')
df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
logger.info(f'Regenerating {color} cards CSV')
filter_by_color(df, 'colorIdentity', color_abv, f'{CSV_DIRECTORY}/{color}_cards.csv')
logger.info(f'Successfully regenerated {color} cards database')
except Exception as e:
logger.error(f'Failed to regenerate {color} cards: {str(e)}')
raise
class SetupOption(Enum):
"""Enum for setup menu options."""
INITIAL_SETUP = 'Initial Setup'
REGENERATE_CSV = 'Regenerate CSV Files'
BACK = 'Back'
def _display_setup_menu() -> SetupOption:
"""Display the setup menu and return the selected option.
Returns:
SetupOption: The selected menu option
"""
question: List[Dict[str, Any]] = [
inquirer.List(
'menu',
choices=[option.value for option in SetupOption],
carousel=True)]
answer = inquirer.prompt(question)
return SetupOption(answer['menu'])
def setup() -> bool:
"""Run the setup process for the MTG Python Deckbuilder.
This function provides a menu-driven interface to:
1. Perform initial setup by downloading and processing card data
2. Regenerate CSV files with updated card data
3. Perform all tagging processes on the color-sorted csv files
The function handles errors gracefully and provides feedback through logging.
Returns:
bool: True if setup completed successfully, False otherwise
"""
try:
print('Which setup operation would you like to perform?\n'
'If this is your first time setting up, do the initial setup.\n'
'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
choice = _display_setup_menu()
if choice == SetupOption.INITIAL_SETUP:
logger.info('Starting initial setup')
initial_setup()
logger.info('Initial setup completed successfully')
return True
elif choice == SetupOption.REGENERATE_CSV:
logger.info('Starting CSV regeneration')
regenerate_csvs_all()
logger.info('CSV regeneration completed successfully')
return True
elif choice == SetupOption.BACK:
logger.info('Setup cancelled by user')
return False
except Exception as e:
logger.error(f'Error during setup: {e}')
raise
return False

View file

@ -0,0 +1,118 @@
from typing import Dict, List, Optional, Final, Tuple, Pattern, Union, Callable
BANNED_CARDS: List[str] = [# in commander
'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
'Braids, Cabal Minion', 'Chaos Orb', 'Coalition Victory',
'Channel', 'Dockside Extortionist', 'Emrakul, the Aeons Torn',
'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
'Flash', 'Gifts Ungiven', 'Golos, Tireless Pilgrim',
'Griselbrand', 'Hullbreacher', 'Iona, Shield of Emeria',
'Karakas', 'Jeweled Lotus', 'Leovold, Emissary of Trest',
'Library of Alexandria', 'Limited Resources', 'Lutri, the Spellchaser',
'Mana Crypt', 'Mox Emerald', 'Mox Jet', 'Mox Pearl', 'Mox Ruby',
'Mox Sapphire', 'Nadu, Winged Wisdom', 'Panoptic Mirror',
'Paradox Engine', 'Primeval Titan', 'Prophet of Kruphix',
'Recurring Nightmare', 'Rofellos, Llanowar Emissary', 'Shahrazad',
'Sundering Titan', 'Sway of the Stars', 'Sylvan Primordial',
'Time Vault', 'Time Walk', 'Tinker', 'Tolarian Academy',
'Trade Secrets', 'Upheaval', 'Yawgmoth\'s Bargain',
# In constructed
'Invoke Prejudice', 'Cleanse', 'Stone-Throwing Devils', 'Pradesh Gypsies',
'Jihad', 'Imprison', 'Crusade'
]
SETUP_COLORS: List[str] = ['colorless', 'white', 'blue', 'black', 'green', 'red',
'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
'bant', 'esper', 'grixis', 'jund', 'naya',
'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
COLOR_ABRV: List[str] = ['Colorless', 'W', 'U', 'B', 'G', 'R',
'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
'B, R, U, W', 'B, G, R, U, W']
# Constants for setup and CSV processing
MTGJSON_API_URL: str = 'https://mtgjson.com/api/v5/csv/cards.csv'
LEGENDARY_OPTIONS: List[str] = [
'Legendary Creature',
'Legendary Artifact',
'Legendary Artifact Creature',
'Legendary Enchantment Creature',
'Legendary Planeswalker'
]
NON_LEGAL_SETS: List[str] = [
'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
'UGL', 'UND', 'UNH', 'UST'
]
CARD_TYPES_TO_EXCLUDE: List[str] = [
'Plane —',
'Conspiracy',
'Vanguard',
'Scheme',
'Phenomenon',
'Stickers',
'Attraction',
'Hero',
'Contraption'
]
# Columns to keep when processing CSV files
CSV_PROCESSING_COLUMNS: List[str] = [
'name', # Card name
'faceName', # Name of specific face for multi-faced cards
'edhrecRank', # Card's rank on EDHREC
'colorIdentity', # Color identity for Commander format
'colors', # Actual colors in card's mana cost
'manaCost', # Mana cost string
'manaValue', # Converted mana cost
'type', # Card type line
'layout', # Card layout (normal, split, etc)
'text', # Card text/rules
'power', # Power (for creatures)
'toughness', # Toughness (for creatures)
'keywords', # Card's keywords
'side' # Side identifier for multi-faced cards
]
# Configuration for DataFrame sorting operations
SORT_CONFIG = {
'columns': ['name', 'side'], # Columns to sort by
'case_sensitive': False # Ignore case when sorting
}
# Configuration for DataFrame filtering operations
FILTER_CONFIG: Dict[str, Dict[str, List[str]]] = {
'layout': {
'exclude': ['reversible_card']
},
'availability': {
'require': ['paper']
},
'promoTypes': {
'exclude': ['playtest']
},
'securityStamp': {
'exclude': ['Heart', 'Acorn']
}
}
COLUMN_ORDER: List[str] = [
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
]
TAGGED_COLUMN_ORDER: List[str] = [
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
]

View file

@ -0,0 +1,408 @@
"""MTG Python Deckbuilder setup utilities.
This module provides utility functions for setting up and managing the MTG Python Deckbuilder
application. It handles tasks such as downloading card data, filtering cards by various criteria,
and processing legendary creatures for commander format.
Key Features:
- Card data download from MTGJSON
- DataFrame filtering and processing
- Color identity filtering
- Commander validation
- CSV file management
The module integrates with settings.py for configuration and exceptions.py for error handling.
"""
from __future__ import annotations
# Standard library imports
import logging
import os
import requests
from pathlib import Path
from typing import List, Optional, Union, TypedDict
# Third-party imports
import pandas as pd
from tqdm import tqdm
# Local application imports
from .setup_constants import (
CSV_PROCESSING_COLUMNS,
CARD_TYPES_TO_EXCLUDE,
NON_LEGAL_SETS,
LEGENDARY_OPTIONS,
SORT_CONFIG,
FILTER_CONFIG,
COLUMN_ORDER,
TAGGED_COLUMN_ORDER
)
from exceptions import (
MTGJSONDownloadError,
DataFrameProcessingError,
ColorFilterError,
CommanderValidationError
)
from type_definitions import CardLibraryDF
from settings import FILL_NA_COLUMNS
import logging_util
# Create logger for this module
logger = logging_util.logging.getLogger(__name__)
logger.setLevel(logging_util.LOG_LEVEL)
logger.addHandler(logging_util.file_handler)
logger.addHandler(logging_util.stream_handler)
# Type definitions
class FilterRule(TypedDict):
"""Type definition for filter rules configuration."""
exclude: Optional[List[str]]
require: Optional[List[str]]
class FilterConfig(TypedDict):
"""Type definition for complete filter configuration."""
layout: FilterRule
availability: FilterRule
promoTypes: FilterRule
securityStamp: FilterRule
def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
"""Download cards data from MTGJSON and save to CSV.
Downloads card data from the specified MTGJSON URL and saves it to a local CSV file.
Shows a progress bar during download using tqdm.
Args:
url: URL to download cards data from (typically MTGJSON API endpoint)
output_path: Path where the downloaded CSV file will be saved
Raises:
MTGJSONDownloadError: If download fails due to network issues or invalid response
Example:
>>> download_cards_csv('https://mtgjson.com/api/v5/cards.csv', 'cards.csv')
"""
try:
response = requests.get(url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
with open(output_path, 'wb') as f:
with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
for chunk in response.iter_content(chunk_size=8192):
size = f.write(chunk)
pbar.update(size)
except requests.RequestException as e:
logger.error(f'Failed to download cards data from {url}')
raise MTGJSONDownloadError(
"Failed to download cards data",
url,
getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
) from e
def check_csv_exists(filepath: Union[str, Path]) -> bool:
"""Check if a CSV file exists at the specified path.
Verifies the existence of a CSV file at the given path. This function is used
to determine if card data needs to be downloaded or if it already exists locally.
Args:
filepath: Path to the CSV file to check
Returns:
bool: True if the file exists, False otherwise
Example:
>>> if not check_csv_exists('cards.csv'):
... download_cards_csv(MTGJSON_API_URL, 'cards.csv')
"""
return Path(filepath).is_file()
def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
"""Apply standard filters to the cards DataFrame using configuration from settings.
Applies a series of filters to the cards DataFrame based on configuration from settings.py.
This includes handling null values, applying basic filters, removing illegal sets and banned cards,
and processing special card types.
Args:
df: pandas DataFrame containing card data to filter
banned_cards: List of card names that are banned and should be excluded
Returns:
pd.DataFrame: A new DataFrame containing only the cards that pass all filters
Raises:
DataFrameProcessingError: If any filtering operation fails
Example:
>>> filtered_df = filter_dataframe(cards_df, ['Channel', 'Black Lotus'])
"""
try:
logger.info('Starting standard DataFrame filtering')
# Fill null values according to configuration
for col, fill_value in FILL_NA_COLUMNS.items():
if col == 'faceName':
fill_value = df['name']
df[col] = df[col].fillna(fill_value)
logger.debug(f'Filled NA values in {col} with {fill_value}')
# Apply basic filters from configuration
filtered_df = df.copy()
filter_config: FilterConfig = FILTER_CONFIG # Type hint for configuration
for field, rules in filter_config.items():
for rule_type, values in rules.items():
if rule_type == 'exclude':
for value in values:
filtered_df = filtered_df[~filtered_df[field].str.contains(value, na=False)]
elif rule_type == 'require':
for value in values:
filtered_df = filtered_df[filtered_df[field].str.contains(value, na=False)]
logger.debug(f'Applied {rule_type} filter for {field}: {values}')
# Remove illegal sets
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
logger.debug('Removed illegal sets')
# Remove banned cards
for card in banned_cards:
filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
logger.debug('Removed banned cards')
# Remove special card types
for card_type in CARD_TYPES_TO_EXCLUDE:
filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
logger.debug('Removed special card types')
# Select columns, sort, and drop duplicates
filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
filtered_df = filtered_df.sort_values(
by=SORT_CONFIG['columns'],
key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
)
filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
logger.info('Completed standard DataFrame filtering')
return filtered_df
except Exception as e:
logger.error(f'Failed to filter DataFrame: {str(e)}')
raise DataFrameProcessingError(
"Failed to filter DataFrame",
"standard_filtering",
str(e)
) from e
def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
"""Filter DataFrame by color identity with additional color-specific processing.
This function extends the base filter_dataframe functionality with color-specific
filtering logic. It is used by setup.py's filter_by_color function but provides
a more robust and configurable implementation.
Args:
df: DataFrame to filter
color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
Returns:
DataFrame filtered by color identity
Raises:
ColorFilterError: If color identity is invalid or filtering fails
DataFrameProcessingError: If general filtering operations fail
"""
try:
logger.info(f'Filtering cards for color identity: {color_identity}')
# Validate color identity
with tqdm(total=1, desc='Validating color identity') as pbar:
if not isinstance(color_identity, str):
raise ColorFilterError(
"Invalid color identity type",
str(color_identity),
"Color identity must be a string"
)
pbar.update(1)
# Apply base filtering
with tqdm(total=1, desc='Applying base filtering') as pbar:
filtered_df = filter_dataframe(df, [])
pbar.update(1)
# Filter by color identity
with tqdm(total=1, desc='Filtering by color identity') as pbar:
filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
logger.debug(f'Applied color identity filter: {color_identity}')
pbar.update(1)
# Additional color-specific processing
with tqdm(total=1, desc='Performing color-specific processing') as pbar:
# Placeholder for future color-specific processing
pbar.update(1)
logger.info(f'Completed color identity filtering for {color_identity}')
return filtered_df
except DataFrameProcessingError as e:
raise ColorFilterError(
"Color filtering failed",
color_identity,
str(e)
) from e
except Exception as e:
raise ColorFilterError(
"Unexpected error during color filtering",
color_identity,
str(e)
) from e
def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
"""Process and filter legendary cards for commander eligibility with comprehensive validation.
Args:
df: DataFrame containing all cards
Returns:
DataFrame containing only commander-eligible cards
Raises:
CommanderValidationError: If validation fails for legendary status, special cases, or set legality
DataFrameProcessingError: If general processing fails
"""
try:
logger.info('Starting commander validation process')
filtered_df = df.copy()
# Step 1: Check legendary status
try:
with tqdm(total=1, desc='Checking legendary status') as pbar:
mask = filtered_df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
if not mask.any():
raise CommanderValidationError(
"No legendary creatures found",
"legendary_check",
"DataFrame contains no cards matching legendary criteria"
)
filtered_df = filtered_df[mask].copy()
logger.debug(f'Found {len(filtered_df)} legendary cards')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Legendary status check failed",
"legendary_check",
str(e)
) from e
# Step 2: Validate special cases
try:
with tqdm(total=1, desc='Validating special cases') as pbar:
special_cases = df['text'].str.contains('can be your commander', na=False)
special_commanders = df[special_cases].copy()
filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
logger.debug(f'Added {len(special_commanders)} special commander cards')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Special case validation failed",
"special_cases",
str(e)
) from e
# Step 3: Verify set legality
try:
with tqdm(total=1, desc='Verifying set legality') as pbar:
initial_count = len(filtered_df)
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
removed_count = initial_count - len(filtered_df)
logger.debug(f'Removed {removed_count} cards from illegal sets')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Set legality verification failed",
"set_legality",
str(e)
) from e
logger.info(f'Commander validation complete. {len(filtered_df)} valid commanders found')
return filtered_df
except CommanderValidationError:
raise
except Exception as e:
raise DataFrameProcessingError(
"Failed to process legendary cards",
"commander_processing",
str(e)
) from e
def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
include_commander_cols: bool = False, skip_availability_checks: bool = False) -> CardLibraryDF:
"""Process DataFrame with common operations in batches.
Args:
df: DataFrame to process
batch_size: Size of batches for processing
columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
include_commander_cols: Whether to include commander-specific columns
skip_availability_checks: Whether to skip availability and security checks (default: False)
Args:
df: DataFrame to process
batch_size: Size of batches for processing
columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
include_commander_cols: Whether to include commander-specific columns
Returns:
CardLibraryDF: Processed DataFrame with standardized structure
"""
logger.info("Processing card DataFrame...")
if columns_to_keep is None:
columns_to_keep = TAGGED_COLUMN_ORDER.copy()
if include_commander_cols:
commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
# Fill NA values
df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
# Process in batches
total_batches = len(df) // batch_size + 1
processed_dfs = []
for i in tqdm(range(total_batches), desc="Processing batches"):
start_idx = i * batch_size
end_idx = min((i + 1) * batch_size, len(df))
batch = df.iloc[start_idx:end_idx].copy()
if not skip_availability_checks:
columns_to_keep = COLUMN_ORDER.copy()
logger.debug("Performing column checks...")
# Common processing steps
batch = batch[batch['availability'].str.contains('paper', na=False)]
batch = batch.loc[batch['layout'] != 'reversible_card']
batch = batch.loc[batch['promoTypes'] != 'playtest']
batch = batch.loc[batch['securityStamp'] != 'heart']
batch = batch.loc[batch['securityStamp'] != 'acorn']
# Keep only specified columns
batch = batch[columns_to_keep]
processed_dfs.append(batch)
else:
logger.debug("Skipping column checks...")
# Keep only specified columns
batch = batch[columns_to_keep]
processed_dfs.append(batch)
# Combine processed batches
result = pd.concat(processed_dfs, ignore_index=True)
# Final processing
result.drop_duplicates(subset='faceName', keep='first', inplace=True)
result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
logger.info("DataFrame processing completed")
return result