Started changing print statements to logging statements in tagger.py

Refined the load dataframe section in tagger.py

Beginning work on changing the tagging functions to use vectorized inputs instead of iterrows
This commit is contained in:
mwisnowski 2024-12-31 10:48:11 -08:00
parent 5a92c04810
commit e404515d91
3 changed files with 178 additions and 38 deletions

View file

@ -429,7 +429,7 @@ class DeckBuilder:
['colorless', 'black', 'green', 'red', 'blue', 'white', 'golgari', 'rakdos',
'dimir', 'orzhov', 'gruul', 'simic', 'selesnya', 'izzet', 'boros', 'azorius',
'jund', 'sultai', 'abzan', 'grixis', 'mardu', 'esper', 'temur', 'naya',
'bant', 'jeska', 'glint', 'dune','witch', 'yore', 'ink', 'wubrg'])
'bant', 'jeskai', 'glint', 'dune','witch', 'yore', 'ink', 'wubrg'])
}
try:
@ -471,10 +471,48 @@ class DeckBuilder:
logging.error(f"Error in determine_color_identity: {e}")
raise
def read_csv(self, filename: str, converters: dict | None = None) -> pd.DataFrame:
"""Read CSV file with error handling and logging.
Args:
filename: Name of the CSV file without extension
converters: Dictionary of converters for specific columns
Returns:
DataFrame from CSV file
"""
try:
filepath = f'{csv_directory}/{filename}_cards.csv'
df = pd.read_csv(filepath, converters=converters or {'themeTags': pd.eval, 'creatureTypes': pd.eval})
logging.debug(f"Successfully read {filename}_cards.csv")
return df
except FileNotFoundError as e:
logging.error(f"File {filename}_cards.csv not found: {e}")
raise
except Exception as e:
logging.error(f"Error reading {filename}_cards.csv: {e}")
raise
def write_csv(self, df: pd.DataFrame, filename: str) -> None:
"""Write DataFrame to CSV with error handling and logging.
Args:
df: DataFrame to write
filename: Name of the CSV file without extension
"""
try:
filepath = f'{csv_directory}/{filename}.csv'
df.to_csv(filepath, index=False)
logging.debug(f"Successfully wrote {filename}.csv")
except Exception as e:
logging.error(f"Error writing {filename}.csv: {e}")
raise
def setup_dataframes(self):
"""Initialize and setup all required DataFrames."""
all_df = []
for file in self.files_to_load:
df = pd.read_csv(f'{csv_directory}/{file}_cards.csv', converters={'themeTags': pd.eval, 'creatureTypes': pd.eval})
df = self.read_csv(file)
all_df.append(df)
self.full_df = pd.concat(all_df, ignore_index=True)
self.full_df.sort_values(by='edhrecRank', inplace=True)

103
tagger.py
View file

@ -1,5 +1,6 @@
from __future__ import annotations
import logging
import os
import pandas as pd # type: ignore
@ -9,17 +10,36 @@ from settings import artifact_tokens, csv_directory, colors, counter_types, ench
from setup import regenerate_csv_by_color
from utility import pluralize, sort_list
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
### Setup
## Load the dataframe
def load_dataframe(color):
"""Load and validate the card dataframe for a given color"""
def load_dataframe(color: str) -> pd.DataFrame:
"""
Load and validate the card dataframe for a given color.
Args:
color (str): The color of cards to load ('white', 'blue', etc)
Returns:
pd.DataFrame: The loaded and validated dataframe
Raises:
FileNotFoundError: If CSV file doesn't exist and can't be regenerated
ValueError: If required columns are missing
"""
try:
filepath = f'{csv_directory}/{color}_cards.csv'
# Check if file exists, regenerate if needed
if not os.path.exists(filepath):
print(f'{color}_cards.csv not found, regenerating it.')
logging.warning(f'{color}_cards.csv not found, regenerating it.')
regenerate_csv_by_color(color)
if not os.path.exists(filepath):
raise FileNotFoundError(f"Failed to generate {filepath}")
# Load initial dataframe for validation
check_df = pd.read_csv(filepath)
@ -30,12 +50,18 @@ def load_dataframe(color):
# Handle missing columns
if missing_columns:
print(f"Missing columns: {missing_columns}")
logging.warning(f"Missing columns: {missing_columns}")
if 'creatureTypes' not in check_df.columns:
kindred_tagging(check_df, color)
if 'themeTags' not in check_df.columns:
create_theme_tags(check_df, color)
# Verify columns were added successfully
check_df = pd.read_csv(filepath)
still_missing = [col for col in required_columns if col not in check_df.columns]
if still_missing:
raise ValueError(f"Failed to add required columns: {still_missing}")
# Load final dataframe with proper converters
df = pd.read_csv(filepath, converters={'themeTags': pd.eval, 'creatureTypes': pd.eval})
@ -43,11 +69,11 @@ def load_dataframe(color):
tag_by_color(df, color)
except FileNotFoundError as e:
print(f'Error: {e}')
logging.error(f'Error: {e}')
except pd.errors.ParserError:
print('Error parsing the CSV file.')
logging.error('Error parsing the CSV file.')
except Exception as e:
print(f'An unexpected error occurred: {e}')
logging.error(f'An unexpected error occurred: {e}')
## Tag cards on a color-by-color basis
def tag_by_color(df, color):
@ -109,14 +135,14 @@ def tag_by_color(df, color):
## Determine any non-creature cards that have creature types mentioned
def kindred_tagging(df, color):
print(f'Settings creature type tags on {color}_cards.csv.\n')
logging.info(f'Settings creature type tags on {color}_cards.csv.\n')
print('===============\n')
# Create new blank list column called 'creatureTypes
df['creatureTypes'] = [[] for _ in range(len(df))]
# Set creature types
print(f'Checking for and setting creature types in {color}_cards.csv')
logging.info(f'Checking for and setting creature types in {color}_cards.csv')
for index, row in df.iterrows():
if 'Creature' in row['type']:
kindred_tags = []
@ -130,11 +156,11 @@ def kindred_tagging(df, color):
if creature_type == creature_types_individual:
kindred_tags.append(creature_type)
df.at[index, 'creatureTypes'] = kindred_tags
print(f'Creature types set in {color}_cards.csv.\n')
logging.info(f'Creature types set in {color}_cards.csv.\n')
print('==========\n')
# Set outlaws
print(f'Checking for and setting Outlaw types in {color}_cards.csv')
logging.info(f'Checking for and setting Outlaw types in {color}_cards.csv')
outlaws = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
for index, row in df.iterrows():
if 'Creature' in row['type']:
@ -145,11 +171,11 @@ def kindred_tagging(df, color):
if 'Outlaw' not in kindred_tags:
kindred_tags.append('Outlaw')
df.at[index, 'creatureTypes'] = kindred_tags
print(f'Outlaw types set in {color}_cards.csv.\n')
logging.info(f'Outlaw types set in {color}_cards.csv.\n')
print('==========\n')
# Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
print(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
for index, row in df.iterrows():
kindred_tags = row['creatureTypes']
if pd.isna(row['text']):
@ -176,42 +202,57 @@ def kindred_tagging(df, color):
if creature_type not in kindred_tags:
kindred_tags.append(creature_type)
df.at[index, 'creatureTypes'] = kindred_tags
print(f'Creature types from text set in {color}_cards.csv.\n')
logging.info(f'Creature types from text set in {color}_cards.csv.\n')
# Overwrite file with creature type tags
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'layout', 'side']
df = df[columns_to_keep]
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
print(f'\nCreature types tagged on {color}_cards.csv.\n')
logging.info(f'\nCreature types tagged on {color}_cards.csv.\n')
## Create theme tags (i.e.)
def create_theme_tags(df, color):
# Create a blank column for theme/effect tags
# This will setup a basis for future tagging to automate deck building
print(f'Creating theme/effect tag column on {color}_cards.csv.')
def create_theme_tags(df: pd.DataFrame, color: str) -> None:
"""
Initialize the themeTags column and reorganize dataframe columns.
# Create new blank list column called 'themeTags
df['themeTags'] = [[] for _ in range(len(df))]
Args:
df (pd.DataFrame): The card dataframe to process
color (str): The color being processed for logging
"""
logging.info(f'Creating theme/effect tag column on {color}_cards.csv.')
# Organize it's location
columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
# Initialize themeTags column efficiently
df['themeTags'] = pd.Series([[] for _ in range(len(df))])
# Define column order
columns_to_keep = [
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
]
# Reorder and select columns
try:
df = df[columns_to_keep]
except KeyError as e:
logging.warning(f"Warning: Missing expected column {e}")
# Continue with available columns
existing_cols = [col for col in columns_to_keep if col in df.columns]
df = df[existing_cols]
# Overwrite original file
# Save dataframe
df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
print(f'Theme/effect tag column created on {color}_cards.csv.\n')
logging.info(f'Theme/effect tag column created on {color}_cards.csv.\n')
## Add card types to the tags
def tag_for_card_types(df, color):
# Iterate through each {color}_cards.csv file to find artifact cards
# Also check for cards that care about artifacts
print(f'Settings card type tags on {color}_cards.csv.\n\n')
logging.info(f'Settings card type tags on {color}_cards.csv.\n\n')
# Define the card types
card_types = ['Artifact', 'Creature', 'Enchantment', 'Land', 'Instant', 'Sorcery', 'Planeswalker', 'Battle']
# Tag for artifacts
for card_type in card_types:
print(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
logging.info(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
for index, row in df.iterrows():
theme_tags = row['themeTags']
if card_type in row['type']:
@ -234,10 +275,10 @@ def tag_for_card_types(df, color):
if tag not in theme_tags:
theme_tags.extend([tag])
df.at[index, 'themeTags'] = theme_tags
print(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
logging.info(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
print('=====\n')
# Overwrite file with artifact tag added
print(f'Card type tags set on {color}_cards.csv.\n')
logging.info(f'Card type tags set on {color}_cards.csv.\n')
## Add creature types to the theme tags
def add_creatures_to_tags(df, color):

61
traycer_testing.py Normal file
View file

@ -0,0 +1,61 @@
def tag_for_cantrips(df, color):
"""
Tag cards in the DataFrame as cantrips based on specific criteria.
Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
The function excludes certain card types, keywords, and specific named cards
from being tagged as cantrips.
Parameters:
df (pd.DataFrame): The DataFrame containing card data.
color (str): The color identifier for logging purposes.
Returns:
None: The function modifies the DataFrame in place by applying tags.
"""
logging.info('Tagging cantrips in %s_cards.csv', color)
# Convert mana value to numeric
df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
# Define exclusion conditions
excluded_types = df['type'].str.contains('Land|Equipment', na=False)
excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
# Define name exclusions
EXCLUDED_NAMES = {
'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
"Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
"Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
# Add other excluded names here
}
excluded_names = df['name'].isin(EXCLUDED_NAMES)
# Define cantrip conditions
has_draw = df['text'].str.contains('draw a card', case=False, na=False)
low_cost = df['manaValue'] <= 2
# Combine all conditions
cantrip_mask = (
~excluded_types &
~excluded_keywords &
~has_loot &
~excluded_names &
has_draw &
low_cost
)
# Apply tags using vectorized operation
apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
logging.info('Finished tagging cantrips in %s_cards.csv', color)