mtg_python_deckbuilder/builder_utils.py
2025-01-14 12:07:49 -08:00

331 lines
No EOL
10 KiB
Python

from typing import Dict, List, Tuple, Optional, Any, Callable, TypeVar, Union
import logging
import functools
import time
import pandas as pd
from fuzzywuzzy import process
from settings import (
COMMANDER_CSV_PATH,
FUZZY_MATCH_THRESHOLD,
MAX_FUZZY_CHOICES,
COMMANDER_CONVERTERS,
DATAFRAME_VALIDATION_RULES,
DATAFRAME_VALIDATION_TIMEOUT,
DATAFRAME_BATCH_SIZE,
DATAFRAME_TRANSFORM_TIMEOUT,
DATAFRAME_REQUIRED_COLUMNS
)
from exceptions import (
DeckBuilderError,
CSVValidationError,
DataFrameValidationError,
DataFrameTimeoutError,
EmptyDataFrameError
)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Type variables for generic functions
T = TypeVar('T')
DataFrame = TypeVar('DataFrame', bound=pd.DataFrame)
def timeout_wrapper(timeout: float) -> Callable:
"""Decorator to add timeout to functions.
Args:
timeout: Maximum execution time in seconds
Returns:
Decorated function with timeout
Raises:
DataFrameTimeoutError: If operation exceeds timeout
"""
def decorator(func: Callable[..., T]) -> Callable[..., T]:
@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> T:
start_time = time.time()
result = func(*args, **kwargs)
elapsed = time.time() - start_time
if elapsed > timeout:
raise DataFrameTimeoutError(
func.__name__,
timeout,
elapsed,
{'args': args, 'kwargs': kwargs}
)
return result
return wrapper
return decorator
def get_validation_rules(data_type: str) -> Dict[str, Dict[str, Any]]:
"""Get validation rules for specific data type.
Args:
data_type: Type of data to get rules for
Returns:
Dictionary of validation rules
"""
from settings import (
CREATURE_VALIDATION_RULES,
SPELL_VALIDATION_RULES,
LAND_VALIDATION_RULES
)
rules_map = {
'creature': CREATURE_VALIDATION_RULES,
'spell': SPELL_VALIDATION_RULES,
'land': LAND_VALIDATION_RULES
}
return rules_map.get(data_type, DATAFRAME_VALIDATION_RULES)
@timeout_wrapper(DATAFRAME_VALIDATION_TIMEOUT)
def validate_dataframe(df: pd.DataFrame, rules: Dict[str, Dict[str, Any]]) -> bool:
"""Validate DataFrame against provided rules.
Args:
df: DataFrame to validate
rules: Validation rules to apply
Returns:
True if validation passes
Raises:
DataFrameValidationError: If validation fails
"""
#print(df.columns)
if df.empty:
raise EmptyDataFrameError("validate_dataframe")
try:
validate_required_columns(df)
validate_column_types(df, rules)
return True
except Exception as e:
raise DataFrameValidationError(
"DataFrame validation failed",
{'rules': rules, 'error': str(e)}
)
def validate_column_types(df: pd.DataFrame, rules: Dict[str, Dict[str, Any]]) -> bool:
"""Validate column types against rules.
Args:
df: DataFrame to validate
rules: Type validation rules
Returns:
True if validation passes
Raises:
DataFrameValidationError: If type validation fails
"""
for col, rule in rules.items():
if col not in df.columns:
continue
expected_type = rule.get('type')
if not expected_type:
continue
if isinstance(expected_type, tuple):
valid = any(df[col].dtype.name.startswith(t) for t in expected_type)
else:
valid = df[col].dtype.name.startswith(expected_type)
if not valid:
raise DataFrameValidationError(
col,
rule,
{'actual_type': df[col].dtype.name}
)
return True
def validate_required_columns(df: pd.DataFrame) -> bool:
"""Validate presence of required columns.
Args:
df: DataFrame to validate
Returns:
True if validation passes
Raises:
DataFrameValidationError: If required columns are missing
"""
#print(df.columns)
missing = set(DATAFRAME_REQUIRED_COLUMNS) - set(df.columns)
if missing:
raise DataFrameValidationError(
"missing_columns",
{'required': DATAFRAME_REQUIRED_COLUMNS},
{'missing': list(missing)}
)
return True
@timeout_wrapper(DATAFRAME_TRANSFORM_TIMEOUT)
def process_dataframe_batch(df: pd.DataFrame, batch_size: int = DATAFRAME_BATCH_SIZE) -> pd.DataFrame:
"""Process DataFrame in batches.
Args:
df: DataFrame to process
batch_size: Size of each batch
Returns:
Processed DataFrame
Raises:
DataFrameTimeoutError: If processing exceeds timeout
"""
processed_dfs = []
for i in range(0, len(df), batch_size):
batch = df.iloc[i:i + batch_size].copy()
processed = transform_dataframe(batch)
processed_dfs.append(processed)
return pd.concat(processed_dfs, ignore_index=True)
def transform_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""Apply transformations to DataFrame.
Args:
df: DataFrame to transform
Returns:
Transformed DataFrame
"""
df = df.copy()
# Fill missing values
df['colorIdentity'] = df['colorIdentity'].fillna('COLORLESS')
df['colors'] = df['colors'].fillna('COLORLESS')
# Convert types
numeric_cols = ['manaValue', 'edhrecRank']
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def combine_dataframes(dfs: List[pd.DataFrame]) -> pd.DataFrame:
"""Combine multiple DataFrames with validation.
Args:
dfs: List of DataFrames to combine
Returns:
Combined DataFrame
Raises:
EmptyDataFrameError: If no valid DataFrames to combine
"""
if not dfs:
raise EmptyDataFrameError("No DataFrames to combine")
valid_dfs = []
for df in dfs:
try:
if validate_dataframe(df, DATAFRAME_VALIDATION_RULES):
valid_dfs.append(df)
except DataFrameValidationError as e:
logger.warning(f"Skipping invalid DataFrame: {e}")
if not valid_dfs:
raise EmptyDataFrameError("No valid DataFrames to combine")
return pd.concat(valid_dfs, ignore_index=True)
def load_commander_data(csv_path: str = COMMANDER_CSV_PATH,
converters: Dict = COMMANDER_CONVERTERS) -> pd.DataFrame:
"""Load and prepare commander data from CSV file.
Args:
csv_path (str): Path to commander CSV file. Defaults to COMMANDER_CSV_PATH.
converters (Dict): Column converters for CSV loading. Defaults to COMMANDER_CONVERTERS.
Returns:
pd.DataFrame: Processed commander dataframe
Raises:
DeckBuilderError: If CSV file cannot be loaded or processed
"""
try:
df = pd.read_csv(csv_path, converters=converters)
df['colorIdentity'] = df['colorIdentity'].fillna('COLORLESS')
df['colors'] = df['colors'].fillna('COLORLESS')
return df
except FileNotFoundError:
logger.error(f"Commander CSV file not found at {csv_path}")
raise DeckBuilderError(f"Commander data file not found: {csv_path}")
except Exception as e:
logger.error(f"Error loading commander data: {e}")
raise DeckBuilderError(f"Failed to load commander data: {str(e)}")
def process_fuzzy_matches(card_name: str,
df: pd.DataFrame,
threshold: int = FUZZY_MATCH_THRESHOLD,
max_choices: int = MAX_FUZZY_CHOICES) -> Tuple[str, List[Tuple[str, int]], bool]:
"""Process fuzzy matching for commander name selection.
Args:
card_name (str): Input card name to match
df (pd.DataFrame): Commander dataframe to search
threshold (int): Minimum score for direct match. Defaults to FUZZY_MATCH_THRESHOLD.
max_choices (int): Maximum number of choices to return. Defaults to MAX_FUZZY_CHOICES.
Returns:
Tuple[str, List[Tuple[str, int]], bool]: Selected card name, list of matches with scores, and match status
"""
try:
match, score, _ = process.extractOne(card_name, df['name'])
if score >= threshold:
return match, [], True
fuzzy_choices = process.extract(card_name, df['name'], limit=max_choices)
fuzzy_choices = [(name, score) for name, score in fuzzy_choices]
return "", fuzzy_choices, False
except Exception as e:
logger.error(f"Error in fuzzy matching: {e}")
raise DeckBuilderError(f"Failed to process fuzzy matches: {str(e)}")
def validate_commander_selection(df: pd.DataFrame, commander_name: str) -> Dict:
"""Validate and format commander data from selection.
Args:
df (pd.DataFrame): Commander dataframe
commander_name (str): Selected commander name
Returns:
Dict: Formatted commander data dictionary
Raises:
DeckBuilderError: If commander data is invalid or missing
"""
try:
filtered_df = df[df['name'] == commander_name]
if filtered_df.empty:
raise DeckBuilderError(f"No commander found with name: {commander_name}")
commander_dict = filtered_df.to_dict('list')
# Validate required fields
required_fields = ['name', 'type', 'colorIdentity', 'colors', 'manaCost', 'manaValue']
for field in required_fields:
if field not in commander_dict or not commander_dict[field]:
raise DeckBuilderError(f"Missing required commander data: {field}")
return commander_dict
except Exception as e:
logger.error(f"Error validating commander selection: {e}")
raise DeckBuilderError(f"Failed to validate commander selection: {str(e)}")