Started work refactoring the tagging functions by using Traycer

This commit is contained in:
mwisnowski 2024-12-31 12:04:10 -08:00
parent e404515d91
commit a4abea2c3c
4 changed files with 736 additions and 516 deletions

View file

@ -32,6 +32,18 @@ board_wipe_tags = ['destroy all', 'destroy each', 'return all', 'return each', '
card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery', card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery',
'Kindred', 'Dungeon', 'Battle'] 'Kindred', 'Dungeon', 'Battle']
# Mapping of card types to their corresponding theme tags
TYPE_TAG_MAPPING = {
'Artifact': ['Artifacts Matter'],
'Battle': ['Battles Matter'],
#'Creature': [],
'Enchantment': ['Enchantments Matter'],
'Instant': ['Spells Matter', 'Spellslinger'],
'Land': ['Lands Matter'],
'Planeswalker': ['Superfriends'],
'Sorcery': ['Spells Matter', 'Spellslinger']
}
csv_directory = 'csv_files' csv_directory = 'csv_files'
colors = ['colorless', 'white', 'blue', 'black', 'red', 'green', colors = ['colorless', 'white', 'blue', 'black', 'red', 'green',
@ -149,3 +161,31 @@ targetted_removal_tags = ['exile target', 'destroy target', 'return target', 'sh
'deals damage to target', 'loses all abilities'] 'deals damage to target', 'loses all abilities']
triggers = ['when', 'whenever', 'at'] triggers = ['when', 'whenever', 'at']
# Constants for DataFrame validation and processing
REQUIRED_COLUMNS = [
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
]
DEFAULT_THEME_TAGS = [
'Aggro', 'Aristocrats', 'Artifacts Matter', 'Big Mana', 'Blink',
'Board Wipes', 'Burn', 'Cantrips', 'Card Draw', 'Clones',
'Combat Matters', 'Control', 'Counters Matter', 'Energy',
'Enter the Battlefield', 'Equipment', 'Exile Matters', 'Infect',
'Interaction', 'Lands Matter', 'Leave the Battlefield', 'Legends Matter',
'Life Matters', 'Mill', 'Monarch', 'Protection', 'Ramp', 'Reanimate',
'Removal', 'Sacrifice Matters', 'Spellslinger', 'Stax', 'Super Friends',
'Theft', 'Token Creation', 'Tokens Matter', 'Voltron', 'X Spells'
]
COLUMN_ORDER = [
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
]
# Constants for type detection and processing
OUTLAW_TYPES = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
TYPE_DETECTION_BATCH_SIZE = 1000

812
tagger.py
View file

@ -2,17 +2,47 @@ from __future__ import annotations
import logging import logging
import os import os
import re
from typing import Dict, List, Optional, Set, Union
import pandas as pd # type: ignore import pandas as pd # type: ignore
import settings import settings
import utility
from settings import artifact_tokens, csv_directory, colors, counter_types, enchantment_tokens, multiple_copy_cards, num_to_search, triggers from settings import artifact_tokens, csv_directory, colors, counter_types, enchantment_tokens, multiple_copy_cards, num_to_search, triggers
from setup import regenerate_csv_by_color from setup import regenerate_csv_by_color
from utility import pluralize, sort_list
# Constants for common tag groupings
TAG_GROUPS = {
"Cantrips": ["Cantrips", "Card Draw", "Spellslinger", "Spells Matter"],
"Tokens": ["Token Creation", "Tokens Matter"],
"Counters": ["Counters Matter"],
"Combat": ["Combat Matters", "Combat Tricks"],
"Artifacts": ["Artifacts Matter", "Artifact Tokens"],
"Enchantments": ["Enchantments Matter", "Enchantment Tokens"],
"Lands": ["Lands Matter"],
"Spells": ["Spellslinger", "Spells Matter"]
}
# Common regex patterns
PATTERN_GROUPS = {
"draw": r"draw[s]? a card|draw[s]? one card",
"combat": r"attack[s]?|block[s]?|combat damage",
"tokens": r"create[s]? .* token|put[s]? .* token",
"counters": r"\+1/\+1 counter|\-1/\-1 counter|loyalty counter",
"sacrifice": r"sacrifice[s]? .*|sacrificed",
"exile": r"exile[s]? .*|exiled"
}
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s' format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('tagger.log', mode='w')
]
) )
### Setup ### Setup
@ -70,10 +100,13 @@ def load_dataframe(color: str) -> pd.DataFrame:
except FileNotFoundError as e: except FileNotFoundError as e:
logging.error(f'Error: {e}') logging.error(f'Error: {e}')
except pd.errors.ParserError: raise
logging.error('Error parsing the CSV file.') except pd.errors.ParserError as e:
logging.error(f'Error parsing the CSV file: {e}')
raise
except Exception as e: except Exception as e:
logging.error(f'An unexpected error occurred: {e}') logging.error(f'An unexpected error occurred: {e}')
raise
## Tag cards on a color-by-color basis ## Tag cards on a color-by-color basis
def tag_by_color(df, color): def tag_by_color(df, color):
@ -90,41 +123,41 @@ def tag_by_color(df, color):
print('====================\n') print('====================\n')
create_theme_tags(df, color) create_theme_tags(df, color)
print('====================\n') print('====================\n')
#
# Go through each type of tagging ## Go through each type of tagging
add_creatures_to_tags(df, color) #add_creatures_to_tags(df, color)
print('====================\n') #print('====================\n')
tag_for_card_types(df, color) tag_for_card_types(df, color)
print('====================\n') print('====================\n')
tag_for_keywords(df, color) #tag_for_keywords(df, color)
print('====================\n') #print('====================\n')
#
# Tag for various effects ## Tag for various effects
tag_for_cost_reduction(df, color) #tag_for_cost_reduction(df, color)
print('====================\n') #print('====================\n')
tag_for_card_draw(df, color) #tag_for_card_draw(df, color)
print('====================\n') #print('====================\n')
tag_for_artifacts(df, color) #tag_for_artifacts(df, color)
print('====================\n') #print('====================\n')
tag_for_enchantments(df, color) #tag_for_enchantments(df, color)
print('====================\n') #print('====================\n')
tag_for_exile_matters(df, color) #tag_for_exile_matters(df, color)
print('====================\n') #print('====================\n')
tag_for_tokens(df, color) #tag_for_tokens(df, color)
print('====================\n') #print('====================\n')
tag_for_life_matters(df, color) #tag_for_life_matters(df, color)
print('====================\n') #print('====================\n')
tag_for_counters(df, color) #tag_for_counters(df, color)
print('====================\n') #print('====================\n')
tag_for_voltron(df, color) #tag_for_voltron(df, color)
print('====================\n') #print('====================\n')
tag_for_spellslinger(df, color) #tag_for_spellslinger(df, color)
print('====================\n') #print('====================\n')
tag_for_ramp(df, color) #tag_for_ramp(df, color)
print('====================\n') #print('====================\n')
tag_for_themes(df, color) #tag_for_themes(df, color)
print('====================\n') #print('====================\n')
tag_for_interaction(df, color) #tag_for_interaction(df, color)
# Lastly, sort all theme tags for easier reading # Lastly, sort all theme tags for easier reading
sort_theme_tags(df, color) sort_theme_tags(df, color)
@ -134,155 +167,236 @@ def tag_by_color(df, color):
#keyboard.wait('esc') #keyboard.wait('esc')
## Determine any non-creature cards that have creature types mentioned ## Determine any non-creature cards that have creature types mentioned
def kindred_tagging(df, color): def kindred_tagging(df: pd.DataFrame, color: str) -> None:
logging.info(f'Settings creature type tags on {color}_cards.csv.\n') """Tag cards with creature types and related types.
print('===============\n')
# Create new blank list column called 'creatureTypes
df['creatureTypes'] = [[] for _ in range(len(df))]
# Set creature types
logging.info(f'Checking for and setting creature types in {color}_cards.csv')
for index, row in df.iterrows():
if 'Creature' in row['type']:
kindred_tags = []
creature_types = row['type']
split_types = creature_types.split()
for creature_type in split_types:
# If the type is a non-creature type ignore it
if creature_type not in settings.non_creature_types:
if creature_type not in kindred_tags:
for creature_types_individual in settings.creature_types:
if creature_type == creature_types_individual:
kindred_tags.append(creature_type)
df.at[index, 'creatureTypes'] = kindred_tags
logging.info(f'Creature types set in {color}_cards.csv.\n')
print('==========\n')
# Set outlaws
logging.info(f'Checking for and setting Outlaw types in {color}_cards.csv')
outlaws = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
for index, row in df.iterrows():
if 'Creature' in row['type']:
kindred_tags = row['creatureTypes']
creature_types = kindred_tags
for creature_type in creature_types:
if creature_type in outlaws:
if 'Outlaw' not in kindred_tags:
kindred_tags.append('Outlaw')
df.at[index, 'creatureTypes'] = kindred_tags
logging.info(f'Outlaw types set in {color}_cards.csv.\n')
print('==========\n')
# Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
for index, row in df.iterrows():
kindred_tags = row['creatureTypes']
if pd.isna(row['text']):
continue
split_text = row['text'].split()
ignore_list = ['Elite Inquisitor', 'Breaker of Armies', 'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon']
for creature_type in settings.creature_types:
if row['name'] in ignore_list:
continue
if creature_type in row['name']:
continue
if pluralize(f'{creature_type}') in row['name']:
continue
for word in split_text:
if creature_type == word:
if creature_type not in row['name']:
if creature_type not in kindred_tags:
kindred_tags.append(creature_type)
df.at[index, 'creatureTypes'] = kindred_tags
# Tag for pluralized types (i.e. Elves, Wolves, etc...) in textbox
if pluralize(f'{creature_type}') == word:
if pluralize(f'{creature_type}') not in row['name']:
if creature_type not in kindred_tags:
kindred_tags.append(creature_type)
df.at[index, 'creatureTypes'] = kindred_tags
logging.info(f'Creature types from text set in {color}_cards.csv.\n')
# Overwrite file with creature type tags
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'layout', 'side']
df = df[columns_to_keep]
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
logging.info(f'\nCreature types tagged on {color}_cards.csv.\n')
def create_theme_tags(df: pd.DataFrame, color: str) -> None:
"""
Initialize the themeTags column and reorganize dataframe columns.
Args: Args:
df (pd.DataFrame): The card dataframe to process df: DataFrame containing card data
color (str): The color being processed for logging color: Color identifier for logging
""" """
logging.info(f'Creating theme/effect tag column on {color}_cards.csv.') start_time = pd.Timestamp.now()
logging.info(f'Setting creature type tags on {color}_cards.csv')
# Initialize themeTags column efficiently try:
df['themeTags'] = pd.Series([[] for _ in range(len(df))]) # Initialize creatureTypes column vectorized
df['creatureTypes'] = pd.Series([[] for _ in range(len(df))])
# Define column order # Detect creature types using mask
columns_to_keep = [ creature_mask = utility.create_type_mask(df, 'Creature')
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors', if creature_mask.any():
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', creature_rows = df[creature_mask]
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side' for idx, row in creature_rows.iterrows():
types = utility.extract_creature_types(
row['type'],
settings.creature_types,
settings.non_creature_types
)
if types:
df.at[idx, 'creatureTypes'] = types
creature_time = pd.Timestamp.now()
logging.info(f'Creature type detection completed in {(creature_time - start_time).total_seconds():.2f}s')
print('==========\n')
# Process outlaw types
outlaws = settings.OUTLAW_TYPES
df['creatureTypes'] = df.apply(
lambda row: utility.add_outlaw_type(row['creatureTypes'], outlaws)
if isinstance(row['creatureTypes'], list) else row['creatureTypes'],
axis=1
)
outlaw_time = pd.Timestamp.now()
logging.info(f'Outlaw type processing completed in {(outlaw_time - creature_time).total_seconds():.2f}s')
# Find creature types in text
logging.info('Checking for creature types in card text')
# Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
ignore_list = [
'Elite Inquisitor', 'Breaker of Armies',
'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon'
] ]
# Reorder and select columns for idx, row in df.iterrows():
if row['name'] not in ignore_list:
text_types = utility.find_types_in_text(
row['text'],
row['name'],
settings.creature_types
)
if text_types:
current_types = row['creatureTypes']
if isinstance(current_types, list):
df.at[idx, 'creatureTypes'] = sorted(
list(set(current_types + text_types))
)
text_time = pd.Timestamp.now()
logging.info(f'Text-based type detection completed in {(text_time - outlaw_time).total_seconds():.2f}s')
# Save results
try: try:
columns_to_keep = [
'name', 'faceName', 'edhrecRank', 'colorIdentity',
'colors', 'manaCost', 'manaValue', 'type',
'creatureTypes', 'text', 'power', 'toughness',
'keywords', 'layout', 'side'
]
df = df[columns_to_keep] df = df[columns_to_keep]
except KeyError as e: df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
logging.warning(f"Warning: Missing expected column {e}") total_time = pd.Timestamp.now() - start_time
# Continue with available columns logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
existing_cols = [col for col in columns_to_keep if col in df.columns]
df = df[existing_cols]
# Save dataframe except Exception as e:
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False) logging.error(f'Error saving results: {e}')
logging.info(f'Theme/effect tag column created on {color}_cards.csv.\n')
def tag_for_card_types(df, color): # Overwrite file with creature type tags
# Iterate through each {color}_cards.csv file to find artifact cards except Exception as e:
# Also check for cards that care about artifacts logging.error(f'Error in kindred_tagging: {e}')
logging.info(f'Settings card type tags on {color}_cards.csv.\n\n') raise
# Define the card types
card_types = ['Artifact', 'Creature', 'Enchantment', 'Land', 'Instant', 'Sorcery', 'Planeswalker', 'Battle']
# Tag for artifacts def create_theme_tags(df: pd.DataFrame, color: str) -> None:
for card_type in card_types: """Initialize and configure theme tags for a card DataFrame.
logging.info(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
for index, row in df.iterrows():
theme_tags = row['themeTags']
if card_type in row['type']:
tag_type = []
# Tagging for artifacts, enchantments, and lands matter This function initializes the themeTags column, validates the DataFrame structure,
if card_type in ['Artifact', 'Battle', 'Enchantment', 'Land']: and reorganizes columns in an efficient manner. It uses vectorized operations
tag_type.extend([f'{card_type}s Matter']) for better performance.
# Tagging for spellslinger/spells matter Args:
if card_type in ['Instant', 'Sorcery']: df: DataFrame containing card data to process
tag_type.extend(['Spells Matter', 'Spellslinger']) color: Color identifier for logging purposes (e.g. 'white', 'blue')
# Tagging for Superfriends Returns:
if card_type == 'Planeswalker': The processed DataFrame with initialized theme tags and reorganized columns
tag_type.extend(['Superfriends'])
# Adding the tags Raises:
for tag in tag_type: ValueError: If required columns are missing or color is invalid
if tag not in theme_tags: TypeError: If inputs are not of correct type
theme_tags.extend([tag]) """
df.at[index, 'themeTags'] = theme_tags start_time = pd.Timestamp.now()
logging.info(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n') logging.info('Initializing theme tags for %s cards', color)
print('=====\n')
# Validate inputs
if not isinstance(df, pd.DataFrame):
raise TypeError("df must be a pandas DataFrame")
if not isinstance(color, str):
raise TypeError("color must be a string")
if color not in settings.colors:
raise ValueError(f"Invalid color: {color}")
try:
# Initialize themeTags column using vectorized operation
df['themeTags'] = pd.Series([[] for _ in range(len(df))], index=df.index)
# Define expected columns
required_columns = {
'name', 'text', 'type', 'keywords',
'creatureTypes', 'power', 'toughness'
}
# Validate required columns
missing = required_columns - set(df.columns)
if missing:
raise ValueError(f"Missing required columns: {missing}")
# Define column order
columns_to_keep = settings.REQUIRED_COLUMNS
# Reorder columns efficiently
available_cols = [col for col in columns_to_keep if col in df.columns]
df = df.reindex(columns=available_cols)
# Save results
try:
df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
total_time = pd.Timestamp.now() - start_time
logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
# Log performance metrics
end_time = pd.Timestamp.now()
duration = (end_time - start_time).total_seconds()
logging.info('Theme tags initialized in %.2f seconds', duration)
except Exception as e:
logging.error(f'Error saving results: {e}')
except Exception as e:
logging.error('Error initializing theme tags: %s', str(e))
raise
def create_text_mask(df: pd.DataFrame, pattern: str) -> pd.Series:
"""Create a boolean mask for rows where text matches a pattern.
Args:
df: The dataframe to search
pattern: Regex pattern to match
Returns:
Boolean series indicating matching rows
"""
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
def create_keyword_mask(df: pd.DataFrame, keywords: Union[str, List[str]]) -> pd.Series:
"""Create a boolean mask for rows with matching keywords.
Args:
df: The dataframe to search
keywords: Keyword or list of keywords to match
Returns:
Boolean series indicating matching rows
"""
if isinstance(keywords, str):
keywords = [keywords]
return df['keywords'].str.contains('|'.join(keywords), case=False, na=False)
def tag_for_card_types(df: pd.DataFrame, color: str) -> None:
"""Tag cards based on their types using vectorized operations.
This function efficiently applies tags based on card types using vectorized operations.
It handles special cases for different card types and maintains compatibility with
the existing tagging system.
Args:
df: DataFrame containing card data
color: Color identifier for logging purposes
Raises:
ValueError: If required columns are missing
"""
start_time = pd.Timestamp.now()
logging.info('Setting card type tags on %s_cards.csv', color)
try:
# Validate required columns
required_cols = {'type', 'themeTags'}
if not required_cols.issubset(df.columns):
raise ValueError(f"Missing required columns: {required_cols - set(df.columns)}")
# Define type-to-tag mapping
type_tag_map = settings.TYPE_TAG_MAPPING
# Process each card type
for card_type, tags in type_tag_map.items():
mask = utility.create_type_mask(df, card_type)
if mask.any():
utility.apply_tag_vectorized(df, mask, tags)
logging.info('Tagged %d cards with %s type', mask.sum(), card_type)
# Log completion
duration = (pd.Timestamp.now() - start_time).total_seconds()
logging.info('Card type tagging completed in %.2fs', duration)
except Exception as e:
logging.error('Error in tag_for_card_types: %s', str(e))
raise
# Overwrite file with artifact tag added # Overwrite file with artifact tag added
logging.info(f'Card type tags set on {color}_cards.csv.\n') logging.info(f'Card type tags set on {color}_cards.csv.\n')
## Add creature types to the theme tags ## Add creature types to the theme tags
def add_creatures_to_tags(df, color): def add_creatures_to_tags(df, color):
print(f'Adding creature types to theme tags in {color}_cards.csv.') logging.info(f'Adding creature types to theme tags in {color}_cards.csv.')
# Add kindred_tags to creatureTypes column # Add kindred_tags to creatureTypes column
for index, row in df.iterrows(): for index, row in df.iterrows():
@ -294,24 +408,7 @@ def add_creatures_to_tags(df, color):
df.at[index, 'themeTags'] = theme_tags df.at[index, 'themeTags'] = theme_tags
# Overwrite file with kindred tags added # Overwrite file with kindred tags added
print(f'Creature types added to theme tags in {color}_cards.csv.\n') logging.info(f'Creature types added to theme tags in {color}_cards.csv.')
print('==========\n')
# Set Kindred Support
print(f'Checking for and setting Kindred Support tag in {color}_cards.csv')
all_kindred = ['changeling', 'choose a creature type', 'shares a creature type',
'shares at least one creature type', 'you control of the chosen type']
for index, row in df.iterrows():
if pd.isna(row['text']):
continue
theme_tags = row['themeTags']
for item in all_kindred:
if item in row['text'].lower():
if 'Kindred Support' not in theme_tags:
theme_tags.extend(['Kindred Support'])
df.at[index, 'themeTags'] = theme_tags
print(f'"Kindred Support" tag set in {color}_cards.csv.\n')
## Add keywords to theme tags ## Add keywords to theme tags
def tag_for_keywords(df, color): def tag_for_keywords(df, color):
@ -336,7 +433,7 @@ def tag_for_keywords(df, color):
def sort_theme_tags(df, color): def sort_theme_tags(df, color):
print(f'Alphabetically sorting theme tags in {color}_cards.csv.') print(f'Alphabetically sorting theme tags in {color}_cards.csv.')
df['themeTags'] = df['themeTags'].apply(sort_list) df['themeTags'] = df['themeTags'].apply(utility.sort_list)
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'] columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
df = df[columns_to_keep] df = df[columns_to_keep]
@ -680,6 +777,62 @@ def tag_for_loot(df, color):
# Overwrite file with loot tag added # Overwrite file with loot tag added
print(f'Loot cards tagged in {color}_cards.csv.\n') print(f'Loot cards tagged in {color}_cards.csv.\n')
def tag_for_cantrips(df: pd.DataFrame, color: str) -> None:
"""Tag cards in the DataFrame as cantrips based on specific criteria.
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
The function excludes certain card types, keywords, and specific named cards
from being tagged as cantrips.
Args:
df: The DataFrame containing card data
color: The color identifier for logging purposes
"""
logging.info('Tagging cantrips in %s_cards.csv', color)
try:
# Convert mana value to numeric
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
# Create exclusion masks
excluded_types = create_text_mask(df, 'Land|Equipment')
excluded_keywords = create_keyword_mask(df, ['Channel', 'Cycling', 'Connive', 'Learn', 'Ravenous'])
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
# Define name exclusions
EXCLUDED_NAMES = {
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal',
'Blood Scrivener', 'Brigon, Soldier of Meletis', 'Compost', 'Cryptbreaker', 'Curiosity',
'Dakra Mystic', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Flumph',
'Folk Hero', 'Goblin Artisans', 'Hatching Plans', 'Ideas Unbound', 'Kor Spiritdancer',
'Mindblade Render', 'Puresteel Paladin', 'Read the Runes', 'Sram, Senior Edificer',
'Sygg, River Cutthroat', 'Wall of Mulch', 'Waste Not'
}
excluded_names = df['name'].isin(EXCLUDED_NAMES)
# Create cantrip condition masks
has_draw = create_text_mask(df, PATTERN_GROUPS['draw'])
low_cost = df['manaValue'].fillna(float('inf')) <= 2
# Combine conditions
cantrip_mask = (
~excluded_types &
~excluded_keywords &
~has_loot &
~excluded_names &
has_draw &
low_cost
)
# Apply tags
utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
logging.info('Successfully tagged cantrips in %s_cards.csv', color)
except Exception as e:
logging.error('Error tagging cantrips in %s_cards.csv: %s', color, str(e))
raise
## Connive Effects ## Connive Effects
def tag_for_connive(df, color): def tag_for_connive(df, color):
print(f'Checking {color}_cards.csv for cards that have or care about "Connive".') print(f'Checking {color}_cards.csv for cards that have or care about "Connive".')
@ -1509,9 +1662,8 @@ def tag_auras(df, color):
theme_tags.extend([tag]) theme_tags.extend([tag])
df.at[index, 'themeTags'] = theme_tags df.at[index, 'themeTags'] = theme_tags
# Overwrite file with aura tag added # Overwrite file with Aura tag added
#df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False) logging.info(f'Cards that have or care about "Aura" in {color}_cards.csv have been tagged.')
print(f'Cards that have or care about "Auraa" in {color}_cards.csv have been tagged.\n')
## Constellation ## Constellation
def tag_constellation(df, color): def tag_constellation(df, color):
@ -2684,244 +2836,67 @@ def tag_for_magecraft(df, color):
## Cantrips ## Cantrips
def tag_for_cantrips(df, color): def tag_for_cantrips(df, color):
# Iterate through each {color}_cards.csv file to find spells matter cards """
print(f'Settings "Cantrip" tags on {color}_cards.csv.') Tag cards in the DataFrame as cantrips based on specific criteria.
df['manaValue'] = df['manaValue'].astype(int)
# Logic for Cantrip cards Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
print('Note: I am considering a cantrip to be a card that has a mana value of 0-2,\n' The function excludes certain card types, keywords, and specific named cards
'does some effect, and draws cards.\n\n' from being tagged as cantrips.
'This also includes activated abilities, that when the combined mana value\n'
'and ability cost are less than 2 mana.\n')
for index, row in df.iterrows():
theme_tags = row['themeTags']
# Take out Lands and Equipment
if ('Land' in row['type']
or 'Equipment' in row['type']):
continue
# Remove ones that have specific kewords Parameters:
if pd.notna(row['keywords']): df (pd.DataFrame): The DataFrame containing card data.
if ('Channel' in row['keywords'] color (str): The color identifier for logging purposes.
or 'Cycling' in row['keywords']
or 'Connive' in row['keywords']
or 'Learn' in row['keywords']
or 'Ravenous' in row['keywords']
):
continue
# Remove cards that loot or have loot effects Returns:
if ('Loot' in row['themeTags'] None: The function modifies the DataFrame in place by applying tags.
): """
continue logging.info('Tagging cantrips in %s_cards.csv', color)
# Exclude specific cards # Convert mana value to numeric
# By name df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
if ('Archivist of Oghma' == row['name']
or 'Argothian Enchantress' == row['name']
or 'Audacity' == row['name']
or 'Betrayal' == row['name']
or 'Bequeathal' == row['name']
or 'Blood Scrivener' == row['name']
or 'Brigone, Soldier of Meletis' == row['name']
or 'compost' == row['name']
or 'Concealing Curtains // Revealing Eye' == row['name']
or 'Cryptbreaker' == row['name']
or 'Curiosity' == row['name']
or 'Curse of Vengenace' == row['name']
or 'Cryptex' == row['name']
or 'Dakra Mystic' == row['name']
or 'Dawn of a New Age' == row['name']
or 'Dockside Chef' == row['name']
or 'Dreamcatcher' == row['name']
or 'Edgewall Innkeeper' == row['name']
or 'Eidolon of Philosphy' == row['name']
or 'Evolveld Sleeper' == row['name']
or 'Femeref Enchantress' == row['name']
or 'Finneas, Ace Archer' == row['name']
or 'Flumph' == row['name']
or 'Folk Hero' == row['name']
or 'Frodo, Adventurous Hobbit' == row['name']
or 'Goblin Artisans' == row['name']
or 'Goldberry, River-Daughter' == row['name']
or 'Gollum, Scheming Guide' == row['name']
or 'Hatching Plans' == row['name']
or 'Ideas Unbound' == row['name']
or 'Ingenius Prodigy' == row['name']
or 'Ior Ruin Expedition' == row['name']
or 'Jace\'s Erasure' == row['name']
or 'Keeper of the Mind' == row['name']
or 'Kor Spiritdancer' == row['name']
or 'Lodestone Bauble' == row['name']
or 'Puresteel Paladin' == row['name']
or 'Jeweled Bird' == row['name']
or 'Mindblade Render' == row['name']
or 'Multani\'s Presence' == row['name']
or 'Nahiri\'s Lithoforming' == row['name']
or 'Ordeal of Thassa' == row['name']
or 'Pollywog Prodigy' == row['name']
or 'Priest of Forgotten Gods' == row['name']
or 'RAvenous Squirrel' == row['name']
or 'Read the Runes' == row['name']
or 'Red Death, Shipwrecker' == row['name']
or 'Roil Cartographer' == row['name']
or 'Sage of Lat-Nam' == row['name']
or 'Saprazzan Heir' == row['name']
or 'Scion of Halaster' == row['name']
or 'See Beyond' == row['name']
or 'Selhoff Entomber' == row['name']
or 'Shielded Aether Thief' == row['name']
or 'Shore Keeper' == row['name']
or 'Silverquill Silencer' == row['name']
or 'Soldevi Sage' == row['name']
or 'Soldevi Sentry' == row['name']
or 'Spiritual Focus' == row['name']
or 'Sram, Senior Edificer' == row['name']
or 'Staff of the Storyteller' == row['name']
or 'Stirge' == row['name']
or 'Sylvan Echoes' == row['name']
or 'Sythis, Harvest\'s Hand' == row['name']
or 'Sygg, River Cutthroat' == row['name']
or 'Tenuous Truce' == row['name']
or 'Test of Talents' == row['name']
or 'Thalakos Seer' == row['name']
or 'Tribute to Horobi // Echo of Death\'s Wail' == row['name']
or 'Vampire Gourmand' == row['name']
or 'Vampiric Rites' == row['name']
or 'Vampirism' == row['name']
or 'Vessel of Paramnesia' == row['name']
or 'Witch\'s Cauldron' == row['name']
or 'Wall of Mulch' == row['name']
or 'Waste Not' == row['name']
or 'Well Rested' == row['name']
):
continue
# Matching text or triggers # Define exclusion conditions
if pd.notna(row['text']): excluded_types = df['type'].str.contains('Land|Equipment', na=False)
if ('cast from exile, you draw a card' in row['text'] excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
or 'commit a crime, draw a card' in row['text'] has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
or 'deals damage to an opponent' in row['text'].lower()
or 'deals combat damage to a player' in row['text'].lower()
or 'deals combat damage to a player, you may draw a card' in row['text'].lower()
or 'deals combat damage to a player, draw a card' in row['text'].lower()
or 'deals combat damage to an opponent' in row['text'].lower()
or 'first time this turn, draw' in row['text'].lower()
or 'Gift a card' in row['text']
or 'give a gift' in row['text'].lower()
or 'then draw a card if it has' in row['text']
or 'target of a spell, draw' in row['text']
or 'target of a spell you control, draw' in row['text']
or 'unless that player pays' in row['text']
# Matches relating to skipping draws # Define name exclusions
or 'draw step, instead you may skip' in row['text'].lower() EXCLUDED_NAMES = {
or 'skip that draw' in row['text'].lower() 'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
): 'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
continue 'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
"Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
"Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
# Add other excluded names here
}
excluded_names = df['name'].isin(EXCLUDED_NAMES)
else: # Define cantrip conditions with enhanced pattern matching
if (row['manaValue'] == 0 draw_patterns = r'draw[s]? a card|draw[s]? one card'
or row['manaValue'] == 1 has_draw = df['text'].str.contains(draw_patterns, case=False, na=False)
or row['manaValue'] == 2 low_cost = df['manaValue'].fillna(float('inf')) <= 2 # Handle NaN values safely
):
if pd.notna(row['text']):
if ('draw a card' in row['text'].lower()
or 'draw a card.' in row['text'].lower()
or 'draw two cards' in row['text'].lower()
or 'draw three cards' in row['text'].lower()
or 'draw x cards' in row['text'].lower()
or 'draws a card' in row['text'].lower()
):
if ('enters, draw a card' in row['text']
or 'enters, you draw a card' in row['text']
or 'enters, you may draw a card' in row['text']
# Specific cards # Combine all conditions
or 'Cling to Dust' == row['name'] cantrip_mask = (
or 'Deduce' == row['name'] ~excluded_types &
or 'Everdream' == row['name'] ~excluded_keywords &
or 'Inverted Iceberg' == row['name'] ~has_loot &
or 'Lunar Rejection' == row['name'] ~excluded_names &
or 'Open of the Sea' == row['name'] has_draw &
or 'Pawpatch Formation' == row['name'] low_cost
or 'Scour All Possibilities' == row['name'] )
or 'Sleight of Hand' == row['name']
or 'Think Twice' == row['name']
or 'Train of Thought' == row['name']
or 'Whispers of the Muse' == row['name']
):
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
for tag in tag_type:
if tag not in theme_tags:
theme_tags.extend([tag])
df.at[index, 'themeTags'] = theme_tags
elif ('{T}: Draw a card' in row['text']
or '{T}: Draw' in row['text']
or 'another legendary creature, draw a card' in row['text'].lower()
or 'artifact or land: draw' in row['text'].lower()
or 'Blood token' in row['text']
or 'creature you control, draw' in row['text'].lower()
or 'creature\'s toughness' in row['text'].lower()
or 'Clue' in row['type']
or 'dies, draw' in row['text']
or 'dies, choose one' in row['text']
or 'dies, you draw a card' in row['text']
or 'discard' in row['text'].lower()
or 'discard a card' in row['text'].lower()
or 'discard your hand' in row['text'].lower()
or 'each player may draw' in row['text'].lower()
or 'each other player' in row['text']
or 'each opponent. draw' in row['text'].lower()
or 'flip a coin' in row['text']
or 'if a player would draw' in row['text'].lower()
or 'if an opponent would draw' in row['text'].lower()
or 'if you would draw' in row['text'].lower()
or 'sacrifice a land: draw' in row['text'].lower()
or 'each player may draw' in row['text'].lower()
or 'opponent controls, draw' in row['text'].lower()
or 'opponent controls, you may draw' in row['text'].lower()
or 'or greater, draw a card' in row['text'].lower()
or 'this turn, draw a card' in row['text'].lower()
or 'turned face up, draw a card' in row['text'].lower()
or 'upkeep, each player draws' in row['text'].lower()
or 'you countrol: draw a card' in row['text'].lower()
or 'you may pay' in row['text']
or 'whenever an opponent draws a card' in row['text'].lower()
or f'{{1}}, Sacrifice {row['name']}: Draw a card' in row['text']
or f'{row['name']} dies' in row['text']
or f'{row['name']} dies, draw a card' in row['text']
or f'{row['name']} dies, you may draw a card' in row['text']
):
continue
elif ('{1}' in row['text']
or '{2}' in row['text']
or '{3}' in row['text']
or '{4}' in row['text']
or '{5}' in row['text']
):
ability_costs = [1, 2, 3]
for i in ability_costs:
if (f'{{{i}}}' in row['text']
or f'pay {i} life: draw' in row['text'].lower()
):
if i + row['manaValue'] >= 3:
continue
else:
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
for tag in tag_type:
if tag not in theme_tags:
theme_tags.extend([tag])
df.at[index, 'themeTags'] = theme_tags
else:
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
for tag in tag_type:
if tag not in theme_tags:
theme_tags.extend([tag])
df.at[index, 'themeTags'] = theme_tags
# Overwrite file with Spells Matter tag added # Apply tags using vectorized operation
print(f'"Cantrip" themed cards in {color}_cards.csv have been tagged.\n') utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
logging.info('Finished tagging cantrips in %s_cards.csv', color)
## Spell Copy ## Spell Copy
def tag_for_spell_copy(df, color): def tag_for_spell_copy(df, color):
@ -4904,5 +4879,6 @@ def tag_for_removal(df, color):
#regenerate_csv_by_color('colorless') #regenerate_csv_by_color('colorless')
for color in colors: #for color in colors:
load_dataframe(color) # load_dataframe(color)
load_dataframe('colorless')

View file

@ -1,61 +0,0 @@
def tag_for_cantrips(df, color):
"""
Tag cards in the DataFrame as cantrips based on specific criteria.
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
The function excludes certain card types, keywords, and specific named cards
from being tagged as cantrips.
Parameters:
df (pd.DataFrame): The DataFrame containing card data.
color (str): The color identifier for logging purposes.
Returns:
None: The function modifies the DataFrame in place by applying tags.
"""
logging.info('Tagging cantrips in %s_cards.csv', color)
# Convert mana value to numeric
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
# Define exclusion conditions
excluded_types = df['type'].str.contains('Land|Equipment', na=False)
excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
# Define name exclusions
EXCLUDED_NAMES = {
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
"Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
"Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
# Add other excluded names here
}
excluded_names = df['name'].isin(EXCLUDED_NAMES)
# Define cantrip conditions
has_draw = df['text'].str.contains('draw a card', case=False, na=False)
low_cost = df['manaValue'] <= 2
# Combine all conditions
cantrip_mask = (
~excluded_types &
~excluded_keywords &
~has_loot &
~excluded_names &
has_draw &
low_cost
)
# Apply tags using vectorized operation
apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
logging.info('Finished tagging cantrips in %s_cards.csv', color)

View file

@ -1,4 +1,18 @@
def pluralize(word): from typing import Union, List
import pandas as pd
import re
import logging
from typing import Dict, Optional, Set
from time import perf_counter
def pluralize(word: str) -> str:
"""Convert a word to its plural form using basic English pluralization rules.
Args:
word: The singular word to pluralize
Returns:
The pluralized word
"""
if word.endswith('y'): if word.endswith('y'):
return word[:-1] + 'ies' return word[:-1] + 'ies'
elif word.endswith(('s', 'sh', 'ch', 'x', 'z')): elif word.endswith(('s', 'sh', 'ch', 'x', 'z')):
@ -8,10 +22,261 @@ def pluralize(word):
else: else:
return word + 's' return word + 's'
def sort_list(list_to_sort): def sort_list(items: Union[List, pd.Series]) -> Union[List, pd.Series]:
if isinstance(list_to_sort, list): """Sort a list or pandas Series in ascending order.
list_to_sort = sorted(list_to_sort)
return list_to_sort
else:
return list_to_sort
Args:
items: List or Series to sort
Returns:
Sorted list or Series
"""
if isinstance(items, (list, pd.Series)):
return sorted(items) if isinstance(items, list) else items.sort_values()
return items
def create_regex_mask(df: pd.DataFrame, column: str, pattern: str) -> pd.Series:
"""Create a boolean mask for rows where a column matches a regex pattern.
Args:
df: DataFrame to search
column: Column name to search in
pattern: Regex pattern to match
Returns:
Boolean Series indicating matching rows
"""
return df[column].str.contains(pattern, case=False, na=False, regex=True)
def combine_masks(masks: List[pd.Series], logical_operator: str = 'and') -> pd.Series:
"""Combine multiple boolean masks with a logical operator.
Args:
masks: List of boolean Series masks to combine
logical_operator: Logical operator to use ('and' or 'or')
Returns:
Combined boolean mask
"""
if not masks:
return pd.Series([], dtype=bool)
result = masks[0]
for mask in masks[1:]:
if logical_operator == 'and':
result = result & mask
else:
result = result | mask
return result
def safe_str_contains(series: pd.Series, patterns: Union[str, List[str]], regex: bool = False) -> pd.Series:
"""Safely check if strings in a Series contain one or more patterns, handling NA values.
Args:
series: String Series to check
patterns: String or list of strings to look for
regex: Whether to treat patterns as regex expressions
Returns:
Boolean Series indicating which strings contain any of the patterns
"""
if isinstance(patterns, str):
patterns = [patterns]
if regex:
pattern = '|'.join(f'({p})' for p in patterns)
return series.fillna('').str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [series.fillna('').str.contains(p, case=False, na=False, regex=False) for p in patterns]
return pd.concat(masks, axis=1).any(axis=1)
def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where type matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
Returns:
Boolean Series indicating matching rows
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
"""
if not type_text:
raise ValueError("type_text cannot be empty or None")
if isinstance(type_text, str):
type_text = [type_text]
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return df['type'].str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def create_combined_type_mask(df: pd.DataFrame, type_patterns: Dict[str, List[str]], logical_operator: str = 'and') -> pd.Series:
"""Create a combined boolean mask from multiple type patterns.
Args:
df: DataFrame to search
type_patterns: Dictionary mapping type categories to lists of patterns
logical_operator: How to combine masks ('and' or 'or')
Returns:
Combined boolean mask
Example:
patterns = {
'creature': ['Creature', 'Artifact Creature'],
'enchantment': ['Enchantment', 'Enchantment Creature']
}
mask = create_combined_type_mask(df, patterns, 'or')
"""
if not type_patterns:
return pd.Series(True, index=df.index)
category_masks = []
for patterns in type_patterns.values():
category_masks.append(create_type_mask(df, patterns))
return combine_masks(category_masks, logical_operator)
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
"""Extract creature types from a type text string.
Args:
type_text: The type line text to parse
creature_types: List of valid creature types
non_creature_types: List of non-creature types to exclude
Returns:
List of extracted creature types
"""
types = [t.strip() for t in type_text.split()]
return [t for t in types if t in creature_types and t not in non_creature_types]
def find_types_in_text(text: str, name: str, creature_types: List[str]) -> List[str]:
"""Find creature types mentioned in card text.
Args:
text: Card text to search
name: Card name to exclude from search
creature_types: List of valid creature types
Returns:
List of found creature types
"""
if pd.isna(text):
return []
found_types = []
words = text.split()
for word in words:
clean_word = re.sub(r'[^a-zA-Z-]', '', word)
if clean_word in creature_types:
if clean_word not in name:
found_types.append(clean_word)
return list(set(found_types))
def add_outlaw_type(types: List[str], outlaw_types: List[str]) -> List[str]:
"""Add Outlaw type if card has an outlaw-related type.
Args:
types: List of current types
outlaw_types: List of types that qualify for Outlaw
Returns:
Updated list of types
"""
if any(t in outlaw_types for t in types) and 'Outlaw' not in types:
return types + ['Outlaw']
return types
def batch_update_types(df: pd.DataFrame, mask: pd.Series, new_types: List[str]) -> None:
"""Update creature types for multiple rows efficiently.
Args:
df: DataFrame to update
mask: Boolean mask indicating which rows to update
new_types: List of types to add
"""
df.loc[mask, 'creatureTypes'] = df.loc[mask, 'creatureTypes'].apply(
lambda x: sorted(list(set(x + new_types)))
)
def create_tag_mask(df: pd.DataFrame, tag_patterns: Union[str, List[str]], column: str = 'themeTags') -> pd.Series:
"""Create a boolean mask for rows where tags match specified patterns.
Args:
df: DataFrame to search
tag_patterns: String or list of strings to match against tags
column: Column containing tags to search (default: 'themeTags')
Returns:
Boolean Series indicating matching rows
"""
if isinstance(tag_patterns, str):
tag_patterns = [tag_patterns]
# Handle empty DataFrame case
if len(df) == 0:
return pd.Series([], dtype=bool)
# Create mask for each pattern
masks = [df[column].apply(lambda x: any(pattern in tag for tag in x)) for pattern in tag_patterns]
# Combine masks with OR
return pd.concat(masks, axis=1).any(axis=1)
def validate_dataframe_columns(df: pd.DataFrame, required_columns: Set[str]) -> None:
"""Validate that DataFrame contains all required columns.
Args:
df: DataFrame to validate
required_columns: Set of column names that must be present
Raises:
ValueError: If any required columns are missing
"""
missing = required_columns - set(df.columns)
if missing:
raise ValueError(f"Missing required columns: {missing}")
def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series, tags: List[str]) -> None:
"""Apply tags to rows in a dataframe based on a boolean mask.
Args:
df: The dataframe to modify
mask: Boolean series indicating which rows to tag
tags: List of tags to apply
"""
if not isinstance(tags, list):
tags = [tags]
# Get current tags for masked rows
current_tags = df.loc[mask, 'themeTags']
# Add new tags
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
def log_performance_metrics(start_time: float, operation: str, df_size: int) -> None:
"""Log performance metrics for an operation.
Args:
start_time: Start time from perf_counter()
operation: Description of the operation performed
df_size: Size of the DataFrame processed
"""
duration = perf_counter() - start_time
logging.info(
f"{operation} completed in {duration:.2f}s for {df_size} rows "
f"({duration/df_size*1000:.2f}ms per row)"
)