mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-17 16:10:12 +01:00
Started work refactoring the tagging functions by using Traycer
This commit is contained in:
parent
e404515d91
commit
a4abea2c3c
4 changed files with 736 additions and 516 deletions
40
settings.py
40
settings.py
|
|
@ -32,6 +32,18 @@ board_wipe_tags = ['destroy all', 'destroy each', 'return all', 'return each', '
|
||||||
card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery',
|
card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery',
|
||||||
'Kindred', 'Dungeon', 'Battle']
|
'Kindred', 'Dungeon', 'Battle']
|
||||||
|
|
||||||
|
# Mapping of card types to their corresponding theme tags
|
||||||
|
TYPE_TAG_MAPPING = {
|
||||||
|
'Artifact': ['Artifacts Matter'],
|
||||||
|
'Battle': ['Battles Matter'],
|
||||||
|
#'Creature': [],
|
||||||
|
'Enchantment': ['Enchantments Matter'],
|
||||||
|
'Instant': ['Spells Matter', 'Spellslinger'],
|
||||||
|
'Land': ['Lands Matter'],
|
||||||
|
'Planeswalker': ['Superfriends'],
|
||||||
|
'Sorcery': ['Spells Matter', 'Spellslinger']
|
||||||
|
}
|
||||||
|
|
||||||
csv_directory = 'csv_files'
|
csv_directory = 'csv_files'
|
||||||
|
|
||||||
colors = ['colorless', 'white', 'blue', 'black', 'red', 'green',
|
colors = ['colorless', 'white', 'blue', 'black', 'red', 'green',
|
||||||
|
|
@ -149,3 +161,31 @@ targetted_removal_tags = ['exile target', 'destroy target', 'return target', 'sh
|
||||||
'deals damage to target', 'loses all abilities']
|
'deals damage to target', 'loses all abilities']
|
||||||
|
|
||||||
triggers = ['when', 'whenever', 'at']
|
triggers = ['when', 'whenever', 'at']
|
||||||
|
|
||||||
|
# Constants for DataFrame validation and processing
|
||||||
|
REQUIRED_COLUMNS = [
|
||||||
|
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
|
||||||
|
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
|
||||||
|
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_THEME_TAGS = [
|
||||||
|
'Aggro', 'Aristocrats', 'Artifacts Matter', 'Big Mana', 'Blink',
|
||||||
|
'Board Wipes', 'Burn', 'Cantrips', 'Card Draw', 'Clones',
|
||||||
|
'Combat Matters', 'Control', 'Counters Matter', 'Energy',
|
||||||
|
'Enter the Battlefield', 'Equipment', 'Exile Matters', 'Infect',
|
||||||
|
'Interaction', 'Lands Matter', 'Leave the Battlefield', 'Legends Matter',
|
||||||
|
'Life Matters', 'Mill', 'Monarch', 'Protection', 'Ramp', 'Reanimate',
|
||||||
|
'Removal', 'Sacrifice Matters', 'Spellslinger', 'Stax', 'Super Friends',
|
||||||
|
'Theft', 'Token Creation', 'Tokens Matter', 'Voltron', 'X Spells'
|
||||||
|
]
|
||||||
|
|
||||||
|
COLUMN_ORDER = [
|
||||||
|
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
|
||||||
|
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
|
||||||
|
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
|
||||||
|
]
|
||||||
|
|
||||||
|
# Constants for type detection and processing
|
||||||
|
OUTLAW_TYPES = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
|
||||||
|
TYPE_DETECTION_BATCH_SIZE = 1000
|
||||||
812
tagger.py
812
tagger.py
|
|
@ -2,17 +2,47 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Dict, List, Optional, Set, Union
|
||||||
|
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd # type: ignore
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
|
import utility
|
||||||
|
|
||||||
from settings import artifact_tokens, csv_directory, colors, counter_types, enchantment_tokens, multiple_copy_cards, num_to_search, triggers
|
from settings import artifact_tokens, csv_directory, colors, counter_types, enchantment_tokens, multiple_copy_cards, num_to_search, triggers
|
||||||
from setup import regenerate_csv_by_color
|
from setup import regenerate_csv_by_color
|
||||||
from utility import pluralize, sort_list
|
|
||||||
|
|
||||||
|
# Constants for common tag groupings
|
||||||
|
TAG_GROUPS = {
|
||||||
|
"Cantrips": ["Cantrips", "Card Draw", "Spellslinger", "Spells Matter"],
|
||||||
|
"Tokens": ["Token Creation", "Tokens Matter"],
|
||||||
|
"Counters": ["Counters Matter"],
|
||||||
|
"Combat": ["Combat Matters", "Combat Tricks"],
|
||||||
|
"Artifacts": ["Artifacts Matter", "Artifact Tokens"],
|
||||||
|
"Enchantments": ["Enchantments Matter", "Enchantment Tokens"],
|
||||||
|
"Lands": ["Lands Matter"],
|
||||||
|
"Spells": ["Spellslinger", "Spells Matter"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Common regex patterns
|
||||||
|
PATTERN_GROUPS = {
|
||||||
|
"draw": r"draw[s]? a card|draw[s]? one card",
|
||||||
|
"combat": r"attack[s]?|block[s]?|combat damage",
|
||||||
|
"tokens": r"create[s]? .* token|put[s]? .* token",
|
||||||
|
"counters": r"\+1/\+1 counter|\-1/\-1 counter|loyalty counter",
|
||||||
|
"sacrifice": r"sacrifice[s]? .*|sacrificed",
|
||||||
|
"exile": r"exile[s]? .*|exiled"
|
||||||
|
}
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(),
|
||||||
|
logging.FileHandler('tagger.log', mode='w')
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
### Setup
|
### Setup
|
||||||
|
|
@ -70,10 +100,13 @@ def load_dataframe(color: str) -> pd.DataFrame:
|
||||||
|
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
logging.error(f'Error: {e}')
|
logging.error(f'Error: {e}')
|
||||||
except pd.errors.ParserError:
|
raise
|
||||||
logging.error('Error parsing the CSV file.')
|
except pd.errors.ParserError as e:
|
||||||
|
logging.error(f'Error parsing the CSV file: {e}')
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'An unexpected error occurred: {e}')
|
logging.error(f'An unexpected error occurred: {e}')
|
||||||
|
raise
|
||||||
|
|
||||||
## Tag cards on a color-by-color basis
|
## Tag cards on a color-by-color basis
|
||||||
def tag_by_color(df, color):
|
def tag_by_color(df, color):
|
||||||
|
|
@ -90,41 +123,41 @@ def tag_by_color(df, color):
|
||||||
print('====================\n')
|
print('====================\n')
|
||||||
create_theme_tags(df, color)
|
create_theme_tags(df, color)
|
||||||
print('====================\n')
|
print('====================\n')
|
||||||
|
#
|
||||||
# Go through each type of tagging
|
## Go through each type of tagging
|
||||||
add_creatures_to_tags(df, color)
|
#add_creatures_to_tags(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_card_types(df, color)
|
tag_for_card_types(df, color)
|
||||||
print('====================\n')
|
print('====================\n')
|
||||||
tag_for_keywords(df, color)
|
#tag_for_keywords(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
|
#
|
||||||
# Tag for various effects
|
## Tag for various effects
|
||||||
tag_for_cost_reduction(df, color)
|
#tag_for_cost_reduction(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_card_draw(df, color)
|
#tag_for_card_draw(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_artifacts(df, color)
|
#tag_for_artifacts(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_enchantments(df, color)
|
#tag_for_enchantments(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_exile_matters(df, color)
|
#tag_for_exile_matters(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_tokens(df, color)
|
#tag_for_tokens(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_life_matters(df, color)
|
#tag_for_life_matters(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_counters(df, color)
|
#tag_for_counters(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_voltron(df, color)
|
#tag_for_voltron(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_spellslinger(df, color)
|
#tag_for_spellslinger(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_ramp(df, color)
|
#tag_for_ramp(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_themes(df, color)
|
#tag_for_themes(df, color)
|
||||||
print('====================\n')
|
#print('====================\n')
|
||||||
tag_for_interaction(df, color)
|
#tag_for_interaction(df, color)
|
||||||
|
|
||||||
# Lastly, sort all theme tags for easier reading
|
# Lastly, sort all theme tags for easier reading
|
||||||
sort_theme_tags(df, color)
|
sort_theme_tags(df, color)
|
||||||
|
|
@ -134,155 +167,236 @@ def tag_by_color(df, color):
|
||||||
#keyboard.wait('esc')
|
#keyboard.wait('esc')
|
||||||
|
|
||||||
## Determine any non-creature cards that have creature types mentioned
|
## Determine any non-creature cards that have creature types mentioned
|
||||||
def kindred_tagging(df, color):
|
def kindred_tagging(df: pd.DataFrame, color: str) -> None:
|
||||||
logging.info(f'Settings creature type tags on {color}_cards.csv.\n')
|
"""Tag cards with creature types and related types.
|
||||||
print('===============\n')
|
|
||||||
|
|
||||||
# Create new blank list column called 'creatureTypes
|
|
||||||
df['creatureTypes'] = [[] for _ in range(len(df))]
|
|
||||||
|
|
||||||
# Set creature types
|
|
||||||
logging.info(f'Checking for and setting creature types in {color}_cards.csv')
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
if 'Creature' in row['type']:
|
|
||||||
kindred_tags = []
|
|
||||||
creature_types = row['type']
|
|
||||||
split_types = creature_types.split()
|
|
||||||
for creature_type in split_types:
|
|
||||||
# If the type is a non-creature type ignore it
|
|
||||||
if creature_type not in settings.non_creature_types:
|
|
||||||
if creature_type not in kindred_tags:
|
|
||||||
for creature_types_individual in settings.creature_types:
|
|
||||||
if creature_type == creature_types_individual:
|
|
||||||
kindred_tags.append(creature_type)
|
|
||||||
df.at[index, 'creatureTypes'] = kindred_tags
|
|
||||||
logging.info(f'Creature types set in {color}_cards.csv.\n')
|
|
||||||
print('==========\n')
|
|
||||||
|
|
||||||
# Set outlaws
|
|
||||||
logging.info(f'Checking for and setting Outlaw types in {color}_cards.csv')
|
|
||||||
outlaws = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
if 'Creature' in row['type']:
|
|
||||||
kindred_tags = row['creatureTypes']
|
|
||||||
creature_types = kindred_tags
|
|
||||||
for creature_type in creature_types:
|
|
||||||
if creature_type in outlaws:
|
|
||||||
if 'Outlaw' not in kindred_tags:
|
|
||||||
kindred_tags.append('Outlaw')
|
|
||||||
df.at[index, 'creatureTypes'] = kindred_tags
|
|
||||||
logging.info(f'Outlaw types set in {color}_cards.csv.\n')
|
|
||||||
print('==========\n')
|
|
||||||
|
|
||||||
# Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
|
|
||||||
logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
kindred_tags = row['creatureTypes']
|
|
||||||
if pd.isna(row['text']):
|
|
||||||
continue
|
|
||||||
split_text = row['text'].split()
|
|
||||||
ignore_list = ['Elite Inquisitor', 'Breaker of Armies', 'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon']
|
|
||||||
for creature_type in settings.creature_types:
|
|
||||||
if row['name'] in ignore_list:
|
|
||||||
continue
|
|
||||||
if creature_type in row['name']:
|
|
||||||
continue
|
|
||||||
if pluralize(f'{creature_type}') in row['name']:
|
|
||||||
continue
|
|
||||||
for word in split_text:
|
|
||||||
if creature_type == word:
|
|
||||||
if creature_type not in row['name']:
|
|
||||||
if creature_type not in kindred_tags:
|
|
||||||
kindred_tags.append(creature_type)
|
|
||||||
df.at[index, 'creatureTypes'] = kindred_tags
|
|
||||||
|
|
||||||
# Tag for pluralized types (i.e. Elves, Wolves, etc...) in textbox
|
|
||||||
if pluralize(f'{creature_type}') == word:
|
|
||||||
if pluralize(f'{creature_type}') not in row['name']:
|
|
||||||
if creature_type not in kindred_tags:
|
|
||||||
kindred_tags.append(creature_type)
|
|
||||||
df.at[index, 'creatureTypes'] = kindred_tags
|
|
||||||
logging.info(f'Creature types from text set in {color}_cards.csv.\n')
|
|
||||||
|
|
||||||
# Overwrite file with creature type tags
|
|
||||||
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'layout', 'side']
|
|
||||||
df = df[columns_to_keep]
|
|
||||||
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
|
|
||||||
logging.info(f'\nCreature types tagged on {color}_cards.csv.\n')
|
|
||||||
|
|
||||||
def create_theme_tags(df: pd.DataFrame, color: str) -> None:
|
|
||||||
"""
|
|
||||||
Initialize the themeTags column and reorganize dataframe columns.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
df (pd.DataFrame): The card dataframe to process
|
df: DataFrame containing card data
|
||||||
color (str): The color being processed for logging
|
color: Color identifier for logging
|
||||||
"""
|
"""
|
||||||
logging.info(f'Creating theme/effect tag column on {color}_cards.csv.')
|
start_time = pd.Timestamp.now()
|
||||||
|
logging.info(f'Setting creature type tags on {color}_cards.csv')
|
||||||
|
|
||||||
# Initialize themeTags column efficiently
|
try:
|
||||||
df['themeTags'] = pd.Series([[] for _ in range(len(df))])
|
# Initialize creatureTypes column vectorized
|
||||||
|
df['creatureTypes'] = pd.Series([[] for _ in range(len(df))])
|
||||||
|
|
||||||
# Define column order
|
# Detect creature types using mask
|
||||||
columns_to_keep = [
|
creature_mask = utility.create_type_mask(df, 'Creature')
|
||||||
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
|
if creature_mask.any():
|
||||||
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
|
creature_rows = df[creature_mask]
|
||||||
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
|
for idx, row in creature_rows.iterrows():
|
||||||
|
types = utility.extract_creature_types(
|
||||||
|
row['type'],
|
||||||
|
settings.creature_types,
|
||||||
|
settings.non_creature_types
|
||||||
|
)
|
||||||
|
if types:
|
||||||
|
df.at[idx, 'creatureTypes'] = types
|
||||||
|
|
||||||
|
creature_time = pd.Timestamp.now()
|
||||||
|
logging.info(f'Creature type detection completed in {(creature_time - start_time).total_seconds():.2f}s')
|
||||||
|
print('==========\n')
|
||||||
|
|
||||||
|
# Process outlaw types
|
||||||
|
outlaws = settings.OUTLAW_TYPES
|
||||||
|
df['creatureTypes'] = df.apply(
|
||||||
|
lambda row: utility.add_outlaw_type(row['creatureTypes'], outlaws)
|
||||||
|
if isinstance(row['creatureTypes'], list) else row['creatureTypes'],
|
||||||
|
axis=1
|
||||||
|
)
|
||||||
|
|
||||||
|
outlaw_time = pd.Timestamp.now()
|
||||||
|
logging.info(f'Outlaw type processing completed in {(outlaw_time - creature_time).total_seconds():.2f}s')
|
||||||
|
|
||||||
|
# Find creature types in text
|
||||||
|
logging.info('Checking for creature types in card text')
|
||||||
|
# Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
|
||||||
|
logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
|
||||||
|
ignore_list = [
|
||||||
|
'Elite Inquisitor', 'Breaker of Armies',
|
||||||
|
'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Reorder and select columns
|
for idx, row in df.iterrows():
|
||||||
|
if row['name'] not in ignore_list:
|
||||||
|
text_types = utility.find_types_in_text(
|
||||||
|
row['text'],
|
||||||
|
row['name'],
|
||||||
|
settings.creature_types
|
||||||
|
)
|
||||||
|
if text_types:
|
||||||
|
current_types = row['creatureTypes']
|
||||||
|
if isinstance(current_types, list):
|
||||||
|
df.at[idx, 'creatureTypes'] = sorted(
|
||||||
|
list(set(current_types + text_types))
|
||||||
|
)
|
||||||
|
|
||||||
|
text_time = pd.Timestamp.now()
|
||||||
|
logging.info(f'Text-based type detection completed in {(text_time - outlaw_time).total_seconds():.2f}s')
|
||||||
|
|
||||||
|
# Save results
|
||||||
try:
|
try:
|
||||||
|
columns_to_keep = [
|
||||||
|
'name', 'faceName', 'edhrecRank', 'colorIdentity',
|
||||||
|
'colors', 'manaCost', 'manaValue', 'type',
|
||||||
|
'creatureTypes', 'text', 'power', 'toughness',
|
||||||
|
'keywords', 'layout', 'side'
|
||||||
|
]
|
||||||
df = df[columns_to_keep]
|
df = df[columns_to_keep]
|
||||||
except KeyError as e:
|
df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
|
||||||
logging.warning(f"Warning: Missing expected column {e}")
|
total_time = pd.Timestamp.now() - start_time
|
||||||
# Continue with available columns
|
logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
|
||||||
existing_cols = [col for col in columns_to_keep if col in df.columns]
|
|
||||||
df = df[existing_cols]
|
|
||||||
|
|
||||||
# Save dataframe
|
except Exception as e:
|
||||||
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
|
logging.error(f'Error saving results: {e}')
|
||||||
logging.info(f'Theme/effect tag column created on {color}_cards.csv.\n')
|
|
||||||
|
|
||||||
def tag_for_card_types(df, color):
|
# Overwrite file with creature type tags
|
||||||
# Iterate through each {color}_cards.csv file to find artifact cards
|
except Exception as e:
|
||||||
# Also check for cards that care about artifacts
|
logging.error(f'Error in kindred_tagging: {e}')
|
||||||
logging.info(f'Settings card type tags on {color}_cards.csv.\n\n')
|
raise
|
||||||
# Define the card types
|
|
||||||
card_types = ['Artifact', 'Creature', 'Enchantment', 'Land', 'Instant', 'Sorcery', 'Planeswalker', 'Battle']
|
|
||||||
|
|
||||||
# Tag for artifacts
|
def create_theme_tags(df: pd.DataFrame, color: str) -> None:
|
||||||
for card_type in card_types:
|
"""Initialize and configure theme tags for a card DataFrame.
|
||||||
logging.info(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
theme_tags = row['themeTags']
|
|
||||||
if card_type in row['type']:
|
|
||||||
tag_type = []
|
|
||||||
|
|
||||||
# Tagging for artifacts, enchantments, and lands matter
|
This function initializes the themeTags column, validates the DataFrame structure,
|
||||||
if card_type in ['Artifact', 'Battle', 'Enchantment', 'Land']:
|
and reorganizes columns in an efficient manner. It uses vectorized operations
|
||||||
tag_type.extend([f'{card_type}s Matter'])
|
for better performance.
|
||||||
|
|
||||||
# Tagging for spellslinger/spells matter
|
Args:
|
||||||
if card_type in ['Instant', 'Sorcery']:
|
df: DataFrame containing card data to process
|
||||||
tag_type.extend(['Spells Matter', 'Spellslinger'])
|
color: Color identifier for logging purposes (e.g. 'white', 'blue')
|
||||||
|
|
||||||
# Tagging for Superfriends
|
Returns:
|
||||||
if card_type == 'Planeswalker':
|
The processed DataFrame with initialized theme tags and reorganized columns
|
||||||
tag_type.extend(['Superfriends'])
|
|
||||||
|
|
||||||
# Adding the tags
|
Raises:
|
||||||
for tag in tag_type:
|
ValueError: If required columns are missing or color is invalid
|
||||||
if tag not in theme_tags:
|
TypeError: If inputs are not of correct type
|
||||||
theme_tags.extend([tag])
|
"""
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
start_time = pd.Timestamp.now()
|
||||||
logging.info(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
|
logging.info('Initializing theme tags for %s cards', color)
|
||||||
print('=====\n')
|
|
||||||
|
# Validate inputs
|
||||||
|
if not isinstance(df, pd.DataFrame):
|
||||||
|
raise TypeError("df must be a pandas DataFrame")
|
||||||
|
if not isinstance(color, str):
|
||||||
|
raise TypeError("color must be a string")
|
||||||
|
if color not in settings.colors:
|
||||||
|
raise ValueError(f"Invalid color: {color}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize themeTags column using vectorized operation
|
||||||
|
df['themeTags'] = pd.Series([[] for _ in range(len(df))], index=df.index)
|
||||||
|
|
||||||
|
# Define expected columns
|
||||||
|
required_columns = {
|
||||||
|
'name', 'text', 'type', 'keywords',
|
||||||
|
'creatureTypes', 'power', 'toughness'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Validate required columns
|
||||||
|
missing = required_columns - set(df.columns)
|
||||||
|
if missing:
|
||||||
|
raise ValueError(f"Missing required columns: {missing}")
|
||||||
|
|
||||||
|
# Define column order
|
||||||
|
columns_to_keep = settings.REQUIRED_COLUMNS
|
||||||
|
|
||||||
|
# Reorder columns efficiently
|
||||||
|
available_cols = [col for col in columns_to_keep if col in df.columns]
|
||||||
|
df = df.reindex(columns=available_cols)
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
try:
|
||||||
|
df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
|
||||||
|
total_time = pd.Timestamp.now() - start_time
|
||||||
|
logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
|
||||||
|
|
||||||
|
# Log performance metrics
|
||||||
|
end_time = pd.Timestamp.now()
|
||||||
|
duration = (end_time - start_time).total_seconds()
|
||||||
|
logging.info('Theme tags initialized in %.2f seconds', duration)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'Error saving results: {e}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error('Error initializing theme tags: %s', str(e))
|
||||||
|
raise
|
||||||
|
|
||||||
|
def create_text_mask(df: pd.DataFrame, pattern: str) -> pd.Series:
|
||||||
|
"""Create a boolean mask for rows where text matches a pattern.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: The dataframe to search
|
||||||
|
pattern: Regex pattern to match
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean series indicating matching rows
|
||||||
|
"""
|
||||||
|
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
|
|
||||||
|
def create_keyword_mask(df: pd.DataFrame, keywords: Union[str, List[str]]) -> pd.Series:
|
||||||
|
"""Create a boolean mask for rows with matching keywords.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: The dataframe to search
|
||||||
|
keywords: Keyword or list of keywords to match
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean series indicating matching rows
|
||||||
|
"""
|
||||||
|
if isinstance(keywords, str):
|
||||||
|
keywords = [keywords]
|
||||||
|
return df['keywords'].str.contains('|'.join(keywords), case=False, na=False)
|
||||||
|
|
||||||
|
def tag_for_card_types(df: pd.DataFrame, color: str) -> None:
|
||||||
|
"""Tag cards based on their types using vectorized operations.
|
||||||
|
|
||||||
|
This function efficiently applies tags based on card types using vectorized operations.
|
||||||
|
It handles special cases for different card types and maintains compatibility with
|
||||||
|
the existing tagging system.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame containing card data
|
||||||
|
color: Color identifier for logging purposes
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If required columns are missing
|
||||||
|
"""
|
||||||
|
start_time = pd.Timestamp.now()
|
||||||
|
logging.info('Setting card type tags on %s_cards.csv', color)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Validate required columns
|
||||||
|
required_cols = {'type', 'themeTags'}
|
||||||
|
if not required_cols.issubset(df.columns):
|
||||||
|
raise ValueError(f"Missing required columns: {required_cols - set(df.columns)}")
|
||||||
|
|
||||||
|
# Define type-to-tag mapping
|
||||||
|
type_tag_map = settings.TYPE_TAG_MAPPING
|
||||||
|
|
||||||
|
# Process each card type
|
||||||
|
for card_type, tags in type_tag_map.items():
|
||||||
|
mask = utility.create_type_mask(df, card_type)
|
||||||
|
if mask.any():
|
||||||
|
utility.apply_tag_vectorized(df, mask, tags)
|
||||||
|
logging.info('Tagged %d cards with %s type', mask.sum(), card_type)
|
||||||
|
|
||||||
|
# Log completion
|
||||||
|
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
||||||
|
logging.info('Card type tagging completed in %.2fs', duration)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error('Error in tag_for_card_types: %s', str(e))
|
||||||
|
raise
|
||||||
# Overwrite file with artifact tag added
|
# Overwrite file with artifact tag added
|
||||||
logging.info(f'Card type tags set on {color}_cards.csv.\n')
|
logging.info(f'Card type tags set on {color}_cards.csv.\n')
|
||||||
|
|
||||||
## Add creature types to the theme tags
|
## Add creature types to the theme tags
|
||||||
def add_creatures_to_tags(df, color):
|
def add_creatures_to_tags(df, color):
|
||||||
print(f'Adding creature types to theme tags in {color}_cards.csv.')
|
logging.info(f'Adding creature types to theme tags in {color}_cards.csv.')
|
||||||
|
|
||||||
# Add kindred_tags to creatureTypes column
|
# Add kindred_tags to creatureTypes column
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
|
|
@ -294,24 +408,7 @@ def add_creatures_to_tags(df, color):
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
df.at[index, 'themeTags'] = theme_tags
|
||||||
|
|
||||||
# Overwrite file with kindred tags added
|
# Overwrite file with kindred tags added
|
||||||
print(f'Creature types added to theme tags in {color}_cards.csv.\n')
|
logging.info(f'Creature types added to theme tags in {color}_cards.csv.')
|
||||||
print('==========\n')
|
|
||||||
|
|
||||||
# Set Kindred Support
|
|
||||||
print(f'Checking for and setting Kindred Support tag in {color}_cards.csv')
|
|
||||||
all_kindred = ['changeling', 'choose a creature type', 'shares a creature type',
|
|
||||||
'shares at least one creature type', 'you control of the chosen type']
|
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
if pd.isna(row['text']):
|
|
||||||
continue
|
|
||||||
theme_tags = row['themeTags']
|
|
||||||
for item in all_kindred:
|
|
||||||
if item in row['text'].lower():
|
|
||||||
if 'Kindred Support' not in theme_tags:
|
|
||||||
theme_tags.extend(['Kindred Support'])
|
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
|
||||||
print(f'"Kindred Support" tag set in {color}_cards.csv.\n')
|
|
||||||
|
|
||||||
## Add keywords to theme tags
|
## Add keywords to theme tags
|
||||||
def tag_for_keywords(df, color):
|
def tag_for_keywords(df, color):
|
||||||
|
|
@ -336,7 +433,7 @@ def tag_for_keywords(df, color):
|
||||||
def sort_theme_tags(df, color):
|
def sort_theme_tags(df, color):
|
||||||
print(f'Alphabetically sorting theme tags in {color}_cards.csv.')
|
print(f'Alphabetically sorting theme tags in {color}_cards.csv.')
|
||||||
|
|
||||||
df['themeTags'] = df['themeTags'].apply(sort_list)
|
df['themeTags'] = df['themeTags'].apply(utility.sort_list)
|
||||||
|
|
||||||
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
|
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
|
||||||
df = df[columns_to_keep]
|
df = df[columns_to_keep]
|
||||||
|
|
@ -680,6 +777,62 @@ def tag_for_loot(df, color):
|
||||||
# Overwrite file with loot tag added
|
# Overwrite file with loot tag added
|
||||||
print(f'Loot cards tagged in {color}_cards.csv.\n')
|
print(f'Loot cards tagged in {color}_cards.csv.\n')
|
||||||
|
|
||||||
|
def tag_for_cantrips(df: pd.DataFrame, color: str) -> None:
|
||||||
|
"""Tag cards in the DataFrame as cantrips based on specific criteria.
|
||||||
|
|
||||||
|
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
|
||||||
|
The function excludes certain card types, keywords, and specific named cards
|
||||||
|
from being tagged as cantrips.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: The DataFrame containing card data
|
||||||
|
color: The color identifier for logging purposes
|
||||||
|
"""
|
||||||
|
logging.info('Tagging cantrips in %s_cards.csv', color)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Convert mana value to numeric
|
||||||
|
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
|
||||||
|
|
||||||
|
# Create exclusion masks
|
||||||
|
excluded_types = create_text_mask(df, 'Land|Equipment')
|
||||||
|
excluded_keywords = create_keyword_mask(df, ['Channel', 'Cycling', 'Connive', 'Learn', 'Ravenous'])
|
||||||
|
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
|
||||||
|
|
||||||
|
# Define name exclusions
|
||||||
|
EXCLUDED_NAMES = {
|
||||||
|
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal',
|
||||||
|
'Blood Scrivener', 'Brigon, Soldier of Meletis', 'Compost', 'Cryptbreaker', 'Curiosity',
|
||||||
|
'Dakra Mystic', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Flumph',
|
||||||
|
'Folk Hero', 'Goblin Artisans', 'Hatching Plans', 'Ideas Unbound', 'Kor Spiritdancer',
|
||||||
|
'Mindblade Render', 'Puresteel Paladin', 'Read the Runes', 'Sram, Senior Edificer',
|
||||||
|
'Sygg, River Cutthroat', 'Wall of Mulch', 'Waste Not'
|
||||||
|
}
|
||||||
|
excluded_names = df['name'].isin(EXCLUDED_NAMES)
|
||||||
|
|
||||||
|
# Create cantrip condition masks
|
||||||
|
has_draw = create_text_mask(df, PATTERN_GROUPS['draw'])
|
||||||
|
low_cost = df['manaValue'].fillna(float('inf')) <= 2
|
||||||
|
|
||||||
|
# Combine conditions
|
||||||
|
cantrip_mask = (
|
||||||
|
~excluded_types &
|
||||||
|
~excluded_keywords &
|
||||||
|
~has_loot &
|
||||||
|
~excluded_names &
|
||||||
|
has_draw &
|
||||||
|
low_cost
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply tags
|
||||||
|
utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
|
||||||
|
|
||||||
|
logging.info('Successfully tagged cantrips in %s_cards.csv', color)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error('Error tagging cantrips in %s_cards.csv: %s', color, str(e))
|
||||||
|
raise
|
||||||
|
|
||||||
## Connive Effects
|
## Connive Effects
|
||||||
def tag_for_connive(df, color):
|
def tag_for_connive(df, color):
|
||||||
print(f'Checking {color}_cards.csv for cards that have or care about "Connive".')
|
print(f'Checking {color}_cards.csv for cards that have or care about "Connive".')
|
||||||
|
|
@ -1509,9 +1662,8 @@ def tag_auras(df, color):
|
||||||
theme_tags.extend([tag])
|
theme_tags.extend([tag])
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
df.at[index, 'themeTags'] = theme_tags
|
||||||
|
|
||||||
# Overwrite file with aura tag added
|
# Overwrite file with Aura tag added
|
||||||
#df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
|
logging.info(f'Cards that have or care about "Aura" in {color}_cards.csv have been tagged.')
|
||||||
print(f'Cards that have or care about "Auraa" in {color}_cards.csv have been tagged.\n')
|
|
||||||
|
|
||||||
## Constellation
|
## Constellation
|
||||||
def tag_constellation(df, color):
|
def tag_constellation(df, color):
|
||||||
|
|
@ -2684,244 +2836,67 @@ def tag_for_magecraft(df, color):
|
||||||
|
|
||||||
## Cantrips
|
## Cantrips
|
||||||
def tag_for_cantrips(df, color):
|
def tag_for_cantrips(df, color):
|
||||||
# Iterate through each {color}_cards.csv file to find spells matter cards
|
"""
|
||||||
print(f'Settings "Cantrip" tags on {color}_cards.csv.')
|
Tag cards in the DataFrame as cantrips based on specific criteria.
|
||||||
df['manaValue'] = df['manaValue'].astype(int)
|
|
||||||
|
|
||||||
# Logic for Cantrip cards
|
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
|
||||||
print('Note: I am considering a cantrip to be a card that has a mana value of 0-2,\n'
|
The function excludes certain card types, keywords, and specific named cards
|
||||||
'does some effect, and draws cards.\n\n'
|
from being tagged as cantrips.
|
||||||
'This also includes activated abilities, that when the combined mana value\n'
|
|
||||||
'and ability cost are less than 2 mana.\n')
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
theme_tags = row['themeTags']
|
|
||||||
# Take out Lands and Equipment
|
|
||||||
if ('Land' in row['type']
|
|
||||||
or 'Equipment' in row['type']):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Remove ones that have specific kewords
|
Parameters:
|
||||||
if pd.notna(row['keywords']):
|
df (pd.DataFrame): The DataFrame containing card data.
|
||||||
if ('Channel' in row['keywords']
|
color (str): The color identifier for logging purposes.
|
||||||
or 'Cycling' in row['keywords']
|
|
||||||
or 'Connive' in row['keywords']
|
|
||||||
or 'Learn' in row['keywords']
|
|
||||||
or 'Ravenous' in row['keywords']
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Remove cards that loot or have loot effects
|
Returns:
|
||||||
if ('Loot' in row['themeTags']
|
None: The function modifies the DataFrame in place by applying tags.
|
||||||
):
|
"""
|
||||||
continue
|
logging.info('Tagging cantrips in %s_cards.csv', color)
|
||||||
|
|
||||||
# Exclude specific cards
|
# Convert mana value to numeric
|
||||||
# By name
|
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
|
||||||
if ('Archivist of Oghma' == row['name']
|
|
||||||
or 'Argothian Enchantress' == row['name']
|
|
||||||
or 'Audacity' == row['name']
|
|
||||||
or 'Betrayal' == row['name']
|
|
||||||
or 'Bequeathal' == row['name']
|
|
||||||
or 'Blood Scrivener' == row['name']
|
|
||||||
or 'Brigone, Soldier of Meletis' == row['name']
|
|
||||||
or 'compost' == row['name']
|
|
||||||
or 'Concealing Curtains // Revealing Eye' == row['name']
|
|
||||||
or 'Cryptbreaker' == row['name']
|
|
||||||
or 'Curiosity' == row['name']
|
|
||||||
or 'Curse of Vengenace' == row['name']
|
|
||||||
or 'Cryptex' == row['name']
|
|
||||||
or 'Dakra Mystic' == row['name']
|
|
||||||
or 'Dawn of a New Age' == row['name']
|
|
||||||
or 'Dockside Chef' == row['name']
|
|
||||||
or 'Dreamcatcher' == row['name']
|
|
||||||
or 'Edgewall Innkeeper' == row['name']
|
|
||||||
or 'Eidolon of Philosphy' == row['name']
|
|
||||||
or 'Evolveld Sleeper' == row['name']
|
|
||||||
or 'Femeref Enchantress' == row['name']
|
|
||||||
or 'Finneas, Ace Archer' == row['name']
|
|
||||||
or 'Flumph' == row['name']
|
|
||||||
or 'Folk Hero' == row['name']
|
|
||||||
or 'Frodo, Adventurous Hobbit' == row['name']
|
|
||||||
or 'Goblin Artisans' == row['name']
|
|
||||||
or 'Goldberry, River-Daughter' == row['name']
|
|
||||||
or 'Gollum, Scheming Guide' == row['name']
|
|
||||||
or 'Hatching Plans' == row['name']
|
|
||||||
or 'Ideas Unbound' == row['name']
|
|
||||||
or 'Ingenius Prodigy' == row['name']
|
|
||||||
or 'Ior Ruin Expedition' == row['name']
|
|
||||||
or 'Jace\'s Erasure' == row['name']
|
|
||||||
or 'Keeper of the Mind' == row['name']
|
|
||||||
or 'Kor Spiritdancer' == row['name']
|
|
||||||
or 'Lodestone Bauble' == row['name']
|
|
||||||
or 'Puresteel Paladin' == row['name']
|
|
||||||
or 'Jeweled Bird' == row['name']
|
|
||||||
or 'Mindblade Render' == row['name']
|
|
||||||
or 'Multani\'s Presence' == row['name']
|
|
||||||
or 'Nahiri\'s Lithoforming' == row['name']
|
|
||||||
or 'Ordeal of Thassa' == row['name']
|
|
||||||
or 'Pollywog Prodigy' == row['name']
|
|
||||||
or 'Priest of Forgotten Gods' == row['name']
|
|
||||||
or 'RAvenous Squirrel' == row['name']
|
|
||||||
or 'Read the Runes' == row['name']
|
|
||||||
or 'Red Death, Shipwrecker' == row['name']
|
|
||||||
or 'Roil Cartographer' == row['name']
|
|
||||||
or 'Sage of Lat-Nam' == row['name']
|
|
||||||
or 'Saprazzan Heir' == row['name']
|
|
||||||
or 'Scion of Halaster' == row['name']
|
|
||||||
or 'See Beyond' == row['name']
|
|
||||||
or 'Selhoff Entomber' == row['name']
|
|
||||||
or 'Shielded Aether Thief' == row['name']
|
|
||||||
or 'Shore Keeper' == row['name']
|
|
||||||
or 'Silverquill Silencer' == row['name']
|
|
||||||
or 'Soldevi Sage' == row['name']
|
|
||||||
or 'Soldevi Sentry' == row['name']
|
|
||||||
or 'Spiritual Focus' == row['name']
|
|
||||||
or 'Sram, Senior Edificer' == row['name']
|
|
||||||
or 'Staff of the Storyteller' == row['name']
|
|
||||||
or 'Stirge' == row['name']
|
|
||||||
or 'Sylvan Echoes' == row['name']
|
|
||||||
or 'Sythis, Harvest\'s Hand' == row['name']
|
|
||||||
or 'Sygg, River Cutthroat' == row['name']
|
|
||||||
or 'Tenuous Truce' == row['name']
|
|
||||||
or 'Test of Talents' == row['name']
|
|
||||||
or 'Thalakos Seer' == row['name']
|
|
||||||
or 'Tribute to Horobi // Echo of Death\'s Wail' == row['name']
|
|
||||||
or 'Vampire Gourmand' == row['name']
|
|
||||||
or 'Vampiric Rites' == row['name']
|
|
||||||
or 'Vampirism' == row['name']
|
|
||||||
or 'Vessel of Paramnesia' == row['name']
|
|
||||||
or 'Witch\'s Cauldron' == row['name']
|
|
||||||
or 'Wall of Mulch' == row['name']
|
|
||||||
or 'Waste Not' == row['name']
|
|
||||||
or 'Well Rested' == row['name']
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Matching text or triggers
|
# Define exclusion conditions
|
||||||
if pd.notna(row['text']):
|
excluded_types = df['type'].str.contains('Land|Equipment', na=False)
|
||||||
if ('cast from exile, you draw a card' in row['text']
|
excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
|
||||||
or 'commit a crime, draw a card' in row['text']
|
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
|
||||||
or 'deals damage to an opponent' in row['text'].lower()
|
|
||||||
or 'deals combat damage to a player' in row['text'].lower()
|
|
||||||
or 'deals combat damage to a player, you may draw a card' in row['text'].lower()
|
|
||||||
or 'deals combat damage to a player, draw a card' in row['text'].lower()
|
|
||||||
or 'deals combat damage to an opponent' in row['text'].lower()
|
|
||||||
or 'first time this turn, draw' in row['text'].lower()
|
|
||||||
or 'Gift a card' in row['text']
|
|
||||||
or 'give a gift' in row['text'].lower()
|
|
||||||
or 'then draw a card if it has' in row['text']
|
|
||||||
or 'target of a spell, draw' in row['text']
|
|
||||||
or 'target of a spell you control, draw' in row['text']
|
|
||||||
or 'unless that player pays' in row['text']
|
|
||||||
|
|
||||||
# Matches relating to skipping draws
|
# Define name exclusions
|
||||||
or 'draw step, instead you may skip' in row['text'].lower()
|
EXCLUDED_NAMES = {
|
||||||
or 'skip that draw' in row['text'].lower()
|
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
|
||||||
):
|
'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
|
||||||
continue
|
'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
|
||||||
|
'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
|
||||||
|
'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
|
||||||
|
"Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
|
||||||
|
"Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
|
||||||
|
'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
|
||||||
|
'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
|
||||||
|
'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
|
||||||
|
'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
|
||||||
|
'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
|
||||||
|
# Add other excluded names here
|
||||||
|
}
|
||||||
|
excluded_names = df['name'].isin(EXCLUDED_NAMES)
|
||||||
|
|
||||||
else:
|
# Define cantrip conditions with enhanced pattern matching
|
||||||
if (row['manaValue'] == 0
|
draw_patterns = r'draw[s]? a card|draw[s]? one card'
|
||||||
or row['manaValue'] == 1
|
has_draw = df['text'].str.contains(draw_patterns, case=False, na=False)
|
||||||
or row['manaValue'] == 2
|
low_cost = df['manaValue'].fillna(float('inf')) <= 2 # Handle NaN values safely
|
||||||
):
|
|
||||||
if pd.notna(row['text']):
|
|
||||||
if ('draw a card' in row['text'].lower()
|
|
||||||
or 'draw a card.' in row['text'].lower()
|
|
||||||
or 'draw two cards' in row['text'].lower()
|
|
||||||
or 'draw three cards' in row['text'].lower()
|
|
||||||
or 'draw x cards' in row['text'].lower()
|
|
||||||
or 'draws a card' in row['text'].lower()
|
|
||||||
):
|
|
||||||
if ('enters, draw a card' in row['text']
|
|
||||||
or 'enters, you draw a card' in row['text']
|
|
||||||
or 'enters, you may draw a card' in row['text']
|
|
||||||
|
|
||||||
# Specific cards
|
# Combine all conditions
|
||||||
or 'Cling to Dust' == row['name']
|
cantrip_mask = (
|
||||||
or 'Deduce' == row['name']
|
~excluded_types &
|
||||||
or 'Everdream' == row['name']
|
~excluded_keywords &
|
||||||
or 'Inverted Iceberg' == row['name']
|
~has_loot &
|
||||||
or 'Lunar Rejection' == row['name']
|
~excluded_names &
|
||||||
or 'Open of the Sea' == row['name']
|
has_draw &
|
||||||
or 'Pawpatch Formation' == row['name']
|
low_cost
|
||||||
or 'Scour All Possibilities' == row['name']
|
)
|
||||||
or 'Sleight of Hand' == row['name']
|
|
||||||
or 'Think Twice' == row['name']
|
|
||||||
or 'Train of Thought' == row['name']
|
|
||||||
or 'Whispers of the Muse' == row['name']
|
|
||||||
):
|
|
||||||
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
|
|
||||||
for tag in tag_type:
|
|
||||||
if tag not in theme_tags:
|
|
||||||
theme_tags.extend([tag])
|
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
|
||||||
elif ('{T}: Draw a card' in row['text']
|
|
||||||
or '{T}: Draw' in row['text']
|
|
||||||
or 'another legendary creature, draw a card' in row['text'].lower()
|
|
||||||
or 'artifact or land: draw' in row['text'].lower()
|
|
||||||
or 'Blood token' in row['text']
|
|
||||||
or 'creature you control, draw' in row['text'].lower()
|
|
||||||
or 'creature\'s toughness' in row['text'].lower()
|
|
||||||
or 'Clue' in row['type']
|
|
||||||
or 'dies, draw' in row['text']
|
|
||||||
or 'dies, choose one' in row['text']
|
|
||||||
or 'dies, you draw a card' in row['text']
|
|
||||||
or 'discard' in row['text'].lower()
|
|
||||||
or 'discard a card' in row['text'].lower()
|
|
||||||
or 'discard your hand' in row['text'].lower()
|
|
||||||
or 'each player may draw' in row['text'].lower()
|
|
||||||
or 'each other player' in row['text']
|
|
||||||
or 'each opponent. draw' in row['text'].lower()
|
|
||||||
or 'flip a coin' in row['text']
|
|
||||||
or 'if a player would draw' in row['text'].lower()
|
|
||||||
or 'if an opponent would draw' in row['text'].lower()
|
|
||||||
or 'if you would draw' in row['text'].lower()
|
|
||||||
or 'sacrifice a land: draw' in row['text'].lower()
|
|
||||||
or 'each player may draw' in row['text'].lower()
|
|
||||||
or 'opponent controls, draw' in row['text'].lower()
|
|
||||||
or 'opponent controls, you may draw' in row['text'].lower()
|
|
||||||
or 'or greater, draw a card' in row['text'].lower()
|
|
||||||
or 'this turn, draw a card' in row['text'].lower()
|
|
||||||
or 'turned face up, draw a card' in row['text'].lower()
|
|
||||||
or 'upkeep, each player draws' in row['text'].lower()
|
|
||||||
or 'you countrol: draw a card' in row['text'].lower()
|
|
||||||
or 'you may pay' in row['text']
|
|
||||||
or 'whenever an opponent draws a card' in row['text'].lower()
|
|
||||||
or f'{{1}}, Sacrifice {row['name']}: Draw a card' in row['text']
|
|
||||||
or f'{row['name']} dies' in row['text']
|
|
||||||
or f'{row['name']} dies, draw a card' in row['text']
|
|
||||||
or f'{row['name']} dies, you may draw a card' in row['text']
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
elif ('{1}' in row['text']
|
|
||||||
or '{2}' in row['text']
|
|
||||||
or '{3}' in row['text']
|
|
||||||
or '{4}' in row['text']
|
|
||||||
or '{5}' in row['text']
|
|
||||||
):
|
|
||||||
ability_costs = [1, 2, 3]
|
|
||||||
for i in ability_costs:
|
|
||||||
if (f'{{{i}}}' in row['text']
|
|
||||||
or f'pay {i} life: draw' in row['text'].lower()
|
|
||||||
):
|
|
||||||
if i + row['manaValue'] >= 3:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
|
|
||||||
for tag in tag_type:
|
|
||||||
if tag not in theme_tags:
|
|
||||||
theme_tags.extend([tag])
|
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
|
||||||
else:
|
|
||||||
tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
|
|
||||||
for tag in tag_type:
|
|
||||||
if tag not in theme_tags:
|
|
||||||
theme_tags.extend([tag])
|
|
||||||
df.at[index, 'themeTags'] = theme_tags
|
|
||||||
|
|
||||||
# Overwrite file with Spells Matter tag added
|
# Apply tags using vectorized operation
|
||||||
print(f'"Cantrip" themed cards in {color}_cards.csv have been tagged.\n')
|
utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
|
||||||
|
|
||||||
|
logging.info('Finished tagging cantrips in %s_cards.csv', color)
|
||||||
|
|
||||||
## Spell Copy
|
## Spell Copy
|
||||||
def tag_for_spell_copy(df, color):
|
def tag_for_spell_copy(df, color):
|
||||||
|
|
@ -4904,5 +4879,6 @@ def tag_for_removal(df, color):
|
||||||
|
|
||||||
|
|
||||||
#regenerate_csv_by_color('colorless')
|
#regenerate_csv_by_color('colorless')
|
||||||
for color in colors:
|
#for color in colors:
|
||||||
load_dataframe(color)
|
# load_dataframe(color)
|
||||||
|
load_dataframe('colorless')
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
def tag_for_cantrips(df, color):
|
|
||||||
"""
|
|
||||||
Tag cards in the DataFrame as cantrips based on specific criteria.
|
|
||||||
|
|
||||||
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
|
|
||||||
The function excludes certain card types, keywords, and specific named cards
|
|
||||||
from being tagged as cantrips.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
df (pd.DataFrame): The DataFrame containing card data.
|
|
||||||
color (str): The color identifier for logging purposes.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
None: The function modifies the DataFrame in place by applying tags.
|
|
||||||
"""
|
|
||||||
logging.info('Tagging cantrips in %s_cards.csv', color)
|
|
||||||
|
|
||||||
# Convert mana value to numeric
|
|
||||||
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
|
|
||||||
|
|
||||||
# Define exclusion conditions
|
|
||||||
excluded_types = df['type'].str.contains('Land|Equipment', na=False)
|
|
||||||
excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
|
|
||||||
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
|
|
||||||
|
|
||||||
# Define name exclusions
|
|
||||||
EXCLUDED_NAMES = {
|
|
||||||
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
|
|
||||||
'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
|
|
||||||
'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
|
|
||||||
'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
|
|
||||||
'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
|
|
||||||
"Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
|
|
||||||
"Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
|
|
||||||
'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
|
|
||||||
'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
|
|
||||||
'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
|
|
||||||
'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
|
|
||||||
'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
|
|
||||||
# Add other excluded names here
|
|
||||||
}
|
|
||||||
excluded_names = df['name'].isin(EXCLUDED_NAMES)
|
|
||||||
|
|
||||||
# Define cantrip conditions
|
|
||||||
has_draw = df['text'].str.contains('draw a card', case=False, na=False)
|
|
||||||
low_cost = df['manaValue'] <= 2
|
|
||||||
|
|
||||||
# Combine all conditions
|
|
||||||
cantrip_mask = (
|
|
||||||
~excluded_types &
|
|
||||||
~excluded_keywords &
|
|
||||||
~has_loot &
|
|
||||||
~excluded_names &
|
|
||||||
has_draw &
|
|
||||||
low_cost
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply tags using vectorized operation
|
|
||||||
apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
|
|
||||||
|
|
||||||
logging.info('Finished tagging cantrips in %s_cards.csv', color)
|
|
||||||
279
utility.py
279
utility.py
|
|
@ -1,4 +1,18 @@
|
||||||
def pluralize(word):
|
from typing import Union, List
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Optional, Set
|
||||||
|
from time import perf_counter
|
||||||
|
def pluralize(word: str) -> str:
|
||||||
|
"""Convert a word to its plural form using basic English pluralization rules.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
word: The singular word to pluralize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The pluralized word
|
||||||
|
"""
|
||||||
if word.endswith('y'):
|
if word.endswith('y'):
|
||||||
return word[:-1] + 'ies'
|
return word[:-1] + 'ies'
|
||||||
elif word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
elif word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
||||||
|
|
@ -8,10 +22,261 @@ def pluralize(word):
|
||||||
else:
|
else:
|
||||||
return word + 's'
|
return word + 's'
|
||||||
|
|
||||||
def sort_list(list_to_sort):
|
def sort_list(items: Union[List, pd.Series]) -> Union[List, pd.Series]:
|
||||||
if isinstance(list_to_sort, list):
|
"""Sort a list or pandas Series in ascending order.
|
||||||
list_to_sort = sorted(list_to_sort)
|
|
||||||
return list_to_sort
|
|
||||||
else:
|
|
||||||
return list_to_sort
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items: List or Series to sort
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sorted list or Series
|
||||||
|
"""
|
||||||
|
if isinstance(items, (list, pd.Series)):
|
||||||
|
return sorted(items) if isinstance(items, list) else items.sort_values()
|
||||||
|
return items
|
||||||
|
|
||||||
|
def create_regex_mask(df: pd.DataFrame, column: str, pattern: str) -> pd.Series:
|
||||||
|
"""Create a boolean mask for rows where a column matches a regex pattern.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to search
|
||||||
|
column: Column name to search in
|
||||||
|
pattern: Regex pattern to match
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean Series indicating matching rows
|
||||||
|
"""
|
||||||
|
return df[column].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
|
|
||||||
|
def combine_masks(masks: List[pd.Series], logical_operator: str = 'and') -> pd.Series:
|
||||||
|
"""Combine multiple boolean masks with a logical operator.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
masks: List of boolean Series masks to combine
|
||||||
|
logical_operator: Logical operator to use ('and' or 'or')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined boolean mask
|
||||||
|
"""
|
||||||
|
if not masks:
|
||||||
|
return pd.Series([], dtype=bool)
|
||||||
|
|
||||||
|
result = masks[0]
|
||||||
|
for mask in masks[1:]:
|
||||||
|
if logical_operator == 'and':
|
||||||
|
result = result & mask
|
||||||
|
else:
|
||||||
|
result = result | mask
|
||||||
|
return result
|
||||||
|
|
||||||
|
def safe_str_contains(series: pd.Series, patterns: Union[str, List[str]], regex: bool = False) -> pd.Series:
|
||||||
|
"""Safely check if strings in a Series contain one or more patterns, handling NA values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
series: String Series to check
|
||||||
|
patterns: String or list of strings to look for
|
||||||
|
regex: Whether to treat patterns as regex expressions
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean Series indicating which strings contain any of the patterns
|
||||||
|
"""
|
||||||
|
if isinstance(patterns, str):
|
||||||
|
patterns = [patterns]
|
||||||
|
|
||||||
|
if regex:
|
||||||
|
pattern = '|'.join(f'({p})' for p in patterns)
|
||||||
|
return series.fillna('').str.contains(pattern, case=False, na=False, regex=True)
|
||||||
|
else:
|
||||||
|
masks = [series.fillna('').str.contains(p, case=False, na=False, regex=False) for p in patterns]
|
||||||
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
|
|
||||||
|
def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
|
||||||
|
"""Create a boolean mask for rows where type matches one or more patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to search
|
||||||
|
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
|
||||||
|
regex: Whether to treat patterns as regex expressions (default: True)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean Series indicating matching rows
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If type_text is empty or None
|
||||||
|
TypeError: If type_text is not a string or list of strings
|
||||||
|
"""
|
||||||
|
if not type_text:
|
||||||
|
raise ValueError("type_text cannot be empty or None")
|
||||||
|
|
||||||
|
if isinstance(type_text, str):
|
||||||
|
type_text = [type_text]
|
||||||
|
elif not isinstance(type_text, list):
|
||||||
|
raise TypeError("type_text must be a string or list of strings")
|
||||||
|
|
||||||
|
if regex:
|
||||||
|
pattern = '|'.join(f'{p}' for p in type_text)
|
||||||
|
return df['type'].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
|
else:
|
||||||
|
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||||
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
|
|
||||||
|
def create_combined_type_mask(df: pd.DataFrame, type_patterns: Dict[str, List[str]], logical_operator: str = 'and') -> pd.Series:
|
||||||
|
"""Create a combined boolean mask from multiple type patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to search
|
||||||
|
type_patterns: Dictionary mapping type categories to lists of patterns
|
||||||
|
logical_operator: How to combine masks ('and' or 'or')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined boolean mask
|
||||||
|
|
||||||
|
Example:
|
||||||
|
patterns = {
|
||||||
|
'creature': ['Creature', 'Artifact Creature'],
|
||||||
|
'enchantment': ['Enchantment', 'Enchantment Creature']
|
||||||
|
}
|
||||||
|
mask = create_combined_type_mask(df, patterns, 'or')
|
||||||
|
"""
|
||||||
|
if not type_patterns:
|
||||||
|
return pd.Series(True, index=df.index)
|
||||||
|
|
||||||
|
category_masks = []
|
||||||
|
for patterns in type_patterns.values():
|
||||||
|
category_masks.append(create_type_mask(df, patterns))
|
||||||
|
|
||||||
|
return combine_masks(category_masks, logical_operator)
|
||||||
|
|
||||||
|
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
|
||||||
|
"""Extract creature types from a type text string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
type_text: The type line text to parse
|
||||||
|
creature_types: List of valid creature types
|
||||||
|
non_creature_types: List of non-creature types to exclude
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of extracted creature types
|
||||||
|
"""
|
||||||
|
types = [t.strip() for t in type_text.split()]
|
||||||
|
return [t for t in types if t in creature_types and t not in non_creature_types]
|
||||||
|
|
||||||
|
def find_types_in_text(text: str, name: str, creature_types: List[str]) -> List[str]:
|
||||||
|
"""Find creature types mentioned in card text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Card text to search
|
||||||
|
name: Card name to exclude from search
|
||||||
|
creature_types: List of valid creature types
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of found creature types
|
||||||
|
"""
|
||||||
|
if pd.isna(text):
|
||||||
|
return []
|
||||||
|
|
||||||
|
found_types = []
|
||||||
|
words = text.split()
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
clean_word = re.sub(r'[^a-zA-Z-]', '', word)
|
||||||
|
if clean_word in creature_types:
|
||||||
|
if clean_word not in name:
|
||||||
|
found_types.append(clean_word)
|
||||||
|
|
||||||
|
return list(set(found_types))
|
||||||
|
|
||||||
|
def add_outlaw_type(types: List[str], outlaw_types: List[str]) -> List[str]:
|
||||||
|
"""Add Outlaw type if card has an outlaw-related type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
types: List of current types
|
||||||
|
outlaw_types: List of types that qualify for Outlaw
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated list of types
|
||||||
|
"""
|
||||||
|
if any(t in outlaw_types for t in types) and 'Outlaw' not in types:
|
||||||
|
return types + ['Outlaw']
|
||||||
|
return types
|
||||||
|
|
||||||
|
def batch_update_types(df: pd.DataFrame, mask: pd.Series, new_types: List[str]) -> None:
|
||||||
|
"""Update creature types for multiple rows efficiently.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to update
|
||||||
|
mask: Boolean mask indicating which rows to update
|
||||||
|
new_types: List of types to add
|
||||||
|
"""
|
||||||
|
df.loc[mask, 'creatureTypes'] = df.loc[mask, 'creatureTypes'].apply(
|
||||||
|
lambda x: sorted(list(set(x + new_types)))
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_tag_mask(df: pd.DataFrame, tag_patterns: Union[str, List[str]], column: str = 'themeTags') -> pd.Series:
|
||||||
|
"""Create a boolean mask for rows where tags match specified patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to search
|
||||||
|
tag_patterns: String or list of strings to match against tags
|
||||||
|
column: Column containing tags to search (default: 'themeTags')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean Series indicating matching rows
|
||||||
|
"""
|
||||||
|
if isinstance(tag_patterns, str):
|
||||||
|
tag_patterns = [tag_patterns]
|
||||||
|
|
||||||
|
# Handle empty DataFrame case
|
||||||
|
if len(df) == 0:
|
||||||
|
return pd.Series([], dtype=bool)
|
||||||
|
|
||||||
|
# Create mask for each pattern
|
||||||
|
masks = [df[column].apply(lambda x: any(pattern in tag for tag in x)) for pattern in tag_patterns]
|
||||||
|
|
||||||
|
# Combine masks with OR
|
||||||
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
|
|
||||||
|
def validate_dataframe_columns(df: pd.DataFrame, required_columns: Set[str]) -> None:
|
||||||
|
"""Validate that DataFrame contains all required columns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to validate
|
||||||
|
required_columns: Set of column names that must be present
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If any required columns are missing
|
||||||
|
"""
|
||||||
|
missing = required_columns - set(df.columns)
|
||||||
|
if missing:
|
||||||
|
raise ValueError(f"Missing required columns: {missing}")
|
||||||
|
|
||||||
|
def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series, tags: List[str]) -> None:
|
||||||
|
"""Apply tags to rows in a dataframe based on a boolean mask.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: The dataframe to modify
|
||||||
|
mask: Boolean series indicating which rows to tag
|
||||||
|
tags: List of tags to apply
|
||||||
|
"""
|
||||||
|
if not isinstance(tags, list):
|
||||||
|
tags = [tags]
|
||||||
|
|
||||||
|
# Get current tags for masked rows
|
||||||
|
current_tags = df.loc[mask, 'themeTags']
|
||||||
|
|
||||||
|
# Add new tags
|
||||||
|
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
|
||||||
|
|
||||||
|
def log_performance_metrics(start_time: float, operation: str, df_size: int) -> None:
|
||||||
|
"""Log performance metrics for an operation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_time: Start time from perf_counter()
|
||||||
|
operation: Description of the operation performed
|
||||||
|
df_size: Size of the DataFrame processed
|
||||||
|
"""
|
||||||
|
duration = perf_counter() - start_time
|
||||||
|
logging.info(
|
||||||
|
f"{operation} completed in {duration:.2f}s for {df_size} rows "
|
||||||
|
f"({duration/df_size*1000:.2f}ms per row)"
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue