mtg_python_deckbuilder/utility.py

347 lines
No EOL
12 KiB
Python

import pandas as pd
import re
import logging
from typing import Dict, List, Optional, Set, Union
from time import perf_counter
import settings
def pluralize(word: str) -> str:
"""Convert a word to its plural form using basic English pluralization rules.
Args:
word: The singular word to pluralize
Returns:
The pluralized word
"""
if word.endswith('y'):
return word[:-1] + 'ies'
elif word.endswith(('s', 'sh', 'ch', 'x', 'z')):
return word + 'es'
elif word.endswith(('f')):
return word[:-1] + 'ves'
else:
return word + 's'
def sort_list(items: Union[List, pd.Series]) -> Union[List, pd.Series]:
"""Sort a list or pandas Series in ascending order.
Args:
items: List or Series to sort
Returns:
Sorted list or Series
"""
if isinstance(items, (list, pd.Series)):
return sorted(items) if isinstance(items, list) else items.sort_values()
return items
def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where type matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
Returns:
Boolean Series indicating matching rows
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
"""
if not type_text:
raise ValueError("type_text cannot be empty or None")
if isinstance(type_text, str):
type_text = [type_text]
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return df['type'].str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series:
"""Create a boolean mask for rows where text matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
combine_with_or: Whether to combine multiple patterns with OR (True) or AND (False)
Returns:
Boolean Series indicating matching rows
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
"""
if not type_text:
raise ValueError("type_text cannot be empty or None")
if isinstance(type_text, str):
type_text = [type_text]
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
if combine_with_or:
return pd.concat(masks, axis=1).any(axis=1)
else:
return pd.concat(masks, axis=1).all(axis=1)
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where keyword text matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
Returns:
Boolean Series indicating matching rows. For rows with empty/null keywords,
returns False.
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
ValueError: If required 'keywords' column is missing from DataFrame
"""
# Validate required columns
validate_dataframe_columns(df, {'keywords'})
# Handle empty DataFrame case
if len(df) == 0:
return pd.Series([], dtype=bool)
if not type_text:
raise ValueError("type_text cannot be empty or None")
if isinstance(type_text, str):
type_text = [type_text]
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
# Create default mask for null values
# Handle null values and convert to string
keywords = df['keywords'].fillna('')
# Convert non-string values to strings
keywords = keywords.astype(str)
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return keywords.str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where name matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
Returns:
Boolean Series indicating matching rows
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
"""
if not type_text:
raise ValueError("type_text cannot be empty or None")
if isinstance(type_text, str):
type_text = [type_text]
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return df['name'].str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['name'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
"""Extract creature types from a type text string.
Args:
type_text: The type line text to parse
creature_types: List of valid creature types
non_creature_types: List of non-creature types to exclude
Returns:
List of extracted creature types
"""
types = [t.strip() for t in type_text.split()]
return [t for t in types if t in creature_types and t not in non_creature_types]
def find_types_in_text(text: str, name: str, creature_types: List[str]) -> List[str]:
"""Find creature types mentioned in card text.
Args:
text: Card text to search
name: Card name to exclude from search
creature_types: List of valid creature types
Returns:
List of found creature types
"""
if pd.isna(text):
return []
found_types = []
words = text.split()
for word in words:
clean_word = re.sub(r'[^a-zA-Z-]', '', word)
if clean_word in creature_types:
if clean_word not in name:
found_types.append(clean_word)
return list(set(found_types))
def add_outlaw_type(types: List[str], outlaw_types: List[str]) -> List[str]:
"""Add Outlaw type if card has an outlaw-related type.
Args:
types: List of current types
outlaw_types: List of types that qualify for Outlaw
Returns:
Updated list of types
"""
if any(t in outlaw_types for t in types) and 'Outlaw' not in types:
return types + ['Outlaw']
return types
def create_tag_mask(df: pd.DataFrame, tag_patterns: Union[str, List[str]], column: str = 'themeTags') -> pd.Series:
"""Create a boolean mask for rows where tags match specified patterns.
Args:
df: DataFrame to search
tag_patterns: String or list of strings to match against tags
column: Column containing tags to search (default: 'themeTags')
Returns:
Boolean Series indicating matching rows
Examples:
# Match cards with draw-related tags
>>> mask = create_tag_mask(df, ['Card Draw', 'Conditional Draw'])
>>> mask = create_tag_mask(df, 'Unconditional Draw')
"""
if isinstance(tag_patterns, str):
tag_patterns = [tag_patterns]
# Handle empty DataFrame case
if len(df) == 0:
return pd.Series([], dtype=bool)
# Create mask for each pattern
masks = [df[column].apply(lambda x: any(pattern in tag for tag in x)) for pattern in tag_patterns]
# Combine masks with OR
return pd.concat(masks, axis=1).any(axis=1)
def validate_dataframe_columns(df: pd.DataFrame, required_columns: Set[str]) -> None:
"""Validate that DataFrame contains all required columns.
Args:
df: DataFrame to validate
required_columns: Set of column names that must be present
Raises:
ValueError: If any required columns are missing
"""
missing = required_columns - set(df.columns)
if missing:
raise ValueError(f"Missing required columns: {missing}")
def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series, tags: List[str]) -> None:
"""Apply tags to rows in a dataframe based on a boolean mask.
Args:
df: The dataframe to modify
mask: Boolean series indicating which rows to tag
tags: List of tags to apply
"""
if not isinstance(tags, list):
tags = [tags]
# Get current tags for masked rows
current_tags = df.loc[mask, 'themeTags']
# Add new tags
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series:
"""Create a boolean mask for cards with mass removal effects of a specific type.
Args:
df: DataFrame to search
effect_type: Type of mass effect to match ('destruction', 'exile', 'bounce', 'sacrifice', 'damage')
Returns:
Boolean Series indicating which cards have mass effects of the specified type
Raises:
ValueError: If effect_type is not recognized
"""
if effect_type not in settings.BOARD_WIPE_TEXT_PATTERNS:
raise ValueError(f"Unknown effect type: {effect_type}")
patterns = settings.BOARD_WIPE_TEXT_PATTERNS[effect_type]
return create_text_mask(df, patterns)
def create_damage_pattern(number: Union[int, str]) -> str:
"""Create a pattern for matching X damage effects.
Args:
number: Number or variable (X) for damage amount
Returns:
Pattern string for matching damage effects
"""
return f'deals {number} damage'
def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series:
"""Create a boolean mask for cards with mass damage effects.
Args:
df: DataFrame to search
Returns:
Boolean Series indicating which cards have mass damage effects
"""
# Create patterns for numeric damage
number_patterns = [create_damage_pattern(i) for i in range(1, 21)]
# Add X damage pattern
number_patterns.append(create_damage_pattern('X'))
# Add patterns for damage targets
target_patterns = [
'to each creature',
'to all creatures',
'to each player',
'to each opponent',
'to everything'
]
# Create masks
damage_mask = create_text_mask(df, number_patterns)
target_mask = create_text_mask(df, target_patterns)
return damage_mask & target_mask