Refactored setup.py again, confirmed that all filters are now working as expected. Work will resume on main branch now

This commit is contained in:
mwisnowski 2025-01-13 11:35:11 -08:00
parent c4d773d663
commit 000d804ba7
6 changed files with 584 additions and 262 deletions

View file

@ -5,8 +5,18 @@ import requests
import logging
from tqdm import tqdm
from pathlib import Path
from typing import List, Optional, Union
from typing import List, Optional, Union, Dict, Any
from settings import (
CSV_PROCESSING_COLUMNS,
CARD_TYPES_TO_EXCLUDE,
NON_LEGAL_SETS,
LEGENDARY_OPTIONS,
FILL_NA_COLUMNS,
SORT_CONFIG,
FILTER_CONFIG
)
from exceptions import CSVFileNotFoundError, MTGJSONDownloadError, DataFrameProcessingError, ColorFilterError, CommanderValidationError
from settings import (
CSV_PROCESSING_COLUMNS,
CARD_TYPES_TO_EXCLUDE,
@ -42,6 +52,7 @@ def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
url,
getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
) from e
def check_csv_exists(filepath: Union[str, Path]) -> bool:
"""Check if a CSV file exists at the specified path.
@ -54,7 +65,7 @@ def check_csv_exists(filepath: Union[str, Path]) -> bool:
return Path(filepath).is_file()
def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
"""Apply standard filters to the cards DataFrame.
"""Apply standard filters to the cards DataFrame using configuration from settings.
Args:
df: DataFrame to filter
@ -67,40 +78,52 @@ def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
DataFrameProcessingError: If filtering operations fail
"""
try:
# Fill null color identities
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
logging.info('Starting standard DataFrame filtering')
# Basic filters
filtered_df = df[
(df['layout'] != 'reversible_card') &
(df['availability'].str.contains('paper', na=False)) &
(df['promoTypes'] != 'playtest') &
(~df['securityStamp'].str.contains('Heart|Acorn', na=False))
]
# Fill null values according to configuration
for col, fill_value in FILL_NA_COLUMNS.items():
if col == 'faceName':
fill_value = df['name']
df[col] = df[col].fillna(fill_value)
logging.debug(f'Filled NA values in {col} with {fill_value}')
# Apply basic filters from configuration
filtered_df = df.copy()
for field, rules in FILTER_CONFIG.items():
for rule_type, values in rules.items():
if rule_type == 'exclude':
for value in values:
filtered_df = filtered_df[~filtered_df[field].str.contains(value, na=False)]
elif rule_type == 'require':
for value in values:
filtered_df = filtered_df[filtered_df[field].str.contains(value, na=False)]
logging.debug(f'Applied {rule_type} filter for {field}: {values}')
# Remove illegal sets
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
logging.debug('Removed illegal sets')
# Remove banned cards
for card in banned_cards:
filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
logging.debug('Removed banned cards')
# Remove special card types
for card_type in CARD_TYPES_TO_EXCLUDE:
filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
logging.debug('Removed special card types')
# Handle face names and duplicates
filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
# Select and sort columns
# Select columns, sort, and drop duplicates
filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
filtered_df = filtered_df.sort_values(
by=SORT_CONFIG['columns'],
key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
)
filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
logging.info('Completed standard DataFrame filtering')
return filtered_df.sort_values(by=['name', 'side'],
key=lambda col: col.str.lower())
return filtered_df
except Exception as e:
raise DataFrameProcessingError(
@ -109,8 +132,78 @@ def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
str(e)
) from e
def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
"""Filter DataFrame by color identity with additional color-specific processing.
This function extends the base filter_dataframe functionality with color-specific
filtering logic. It is used by setup.py's filter_by_color function but provides
a more robust and configurable implementation.
Args:
df: DataFrame to filter
color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
Returns:
DataFrame filtered by color identity
Raises:
ColorFilterError: If color identity is invalid or filtering fails
DataFrameProcessingError: If general filtering operations fail
"""
try:
logging.info(f'Filtering cards for color identity: {color_identity}')
# Define processing steps for progress tracking
steps = [
'Validating color identity',
'Applying base filtering',
'Filtering by color identity',
'Performing color-specific processing'
]
# Validate color identity
with tqdm(total=1, desc='Validating color identity') as pbar:
if not isinstance(color_identity, str):
raise ColorFilterError(
"Invalid color identity type",
str(color_identity),
"Color identity must be a string"
)
pbar.update(1)
# Apply base filtering
with tqdm(total=1, desc='Applying base filtering') as pbar:
filtered_df = filter_dataframe(df, [])
pbar.update(1)
# Filter by color identity
with tqdm(total=1, desc='Filtering by color identity') as pbar:
filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
logging.debug(f'Applied color identity filter: {color_identity}')
pbar.update(1)
# Additional color-specific processing
with tqdm(total=1, desc='Performing color-specific processing') as pbar:
# Placeholder for future color-specific processing
pbar.update(1)
logging.info(f'Completed color identity filtering for {color_identity}')
return filtered_df
except DataFrameProcessingError as e:
raise ColorFilterError(
"Color filtering failed",
color_identity,
str(e)
) from e
except Exception as e:
raise ColorFilterError(
"Unexpected error during color filtering",
color_identity,
str(e)
) from e
def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
"""Process and filter legendary cards for commander eligibility.
"""Process and filter legendary cards for commander eligibility with comprehensive validation.
Args:
df: DataFrame containing all cards
@ -119,28 +212,75 @@ def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
DataFrame containing only commander-eligible cards
Raises:
DataFrameProcessingError: If processing fails
CommanderValidationError: If validation fails for legendary status, special cases, or set legality
DataFrameProcessingError: If general processing fails
"""
try:
# Filter for legendary creatures and eligible cards
mask = df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
# Add cards that can be commanders
can_be_commander = df['text'].str.contains(
'can be your commander',
na=False
)
filtered_df = df[mask | can_be_commander].copy()
logging.info('Starting commander validation process')
validation_steps = [
'Checking legendary status',
'Validating special cases',
'Verifying set legality'
]
# Remove illegal sets
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
filtered_df = df.copy()
# Step 1: Check legendary status
try:
with tqdm(total=1, desc='Checking legendary status') as pbar:
mask = filtered_df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
if not mask.any():
raise CommanderValidationError(
"No legendary creatures found",
"legendary_check",
"DataFrame contains no cards matching legendary criteria"
)
filtered_df = filtered_df[mask].copy()
logging.debug(f'Found {len(filtered_df)} legendary cards')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Legendary status check failed",
"legendary_check",
str(e)
) from e
# Step 2: Validate special cases
try:
with tqdm(total=1, desc='Validating special cases') as pbar:
special_cases = df['text'].str.contains('can be your commander', na=False)
special_commanders = df[special_cases].copy()
filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
logging.debug(f'Added {len(special_commanders)} special commander cards')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Special case validation failed",
"special_cases",
str(e)
) from e
# Step 3: Verify set legality
try:
with tqdm(total=1, desc='Verifying set legality') as pbar:
initial_count = len(filtered_df)
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
removed_count = initial_count - len(filtered_df)
logging.debug(f'Removed {removed_count} cards from illegal sets')
pbar.update(1)
except Exception as e:
raise CommanderValidationError(
"Set legality verification failed",
"set_legality",
str(e)
) from e
logging.info(f'Commander validation complete. {len(filtered_df)} valid commanders found')
return filtered_df
except CommanderValidationError:
raise
except Exception as e:
raise DataFrameProcessingError(
"Failed to process legendary cards",