Started changing print statements to logging statements in tagger.py

Refined the load dataframe section in tagger.py Beginning work on changing the tagging functions to use vectorized inputs instead of iterrows
2025-09-22 04:50:46 +02:00 · 2024-12-31 10:48:11 -08:00 · 2024-12-31 10:48:11 -08:00 · e404515d91
commit e404515d91
parent 5a92c04810
3 changed files with 178 additions and 38 deletions
--- a/deck_builder.py
+++ b/deck_builder.py
@ -429,7 +429,7 @@ class DeckBuilder:
                              ['colorless', 'black', 'green', 'red', 'blue', 'white', 'golgari', 'rakdos',
                               'dimir', 'orzhov', 'gruul', 'simic', 'selesnya', 'izzet', 'boros', 'azorius',
                               'jund', 'sultai', 'abzan', 'grixis', 'mardu', 'esper', 'temur', 'naya',
-                               'bant', 'jeska', 'glint', 'dune','witch', 'yore', 'ink', 'wubrg'])
+                               'bant', 'jeskai', 'glint', 'dune','witch', 'yore', 'ink', 'wubrg'])
        }
        
        try:
@ -471,12 +471,50 @@ class DeckBuilder:
            logging.error(f"Error in determine_color_identity: {e}")
            raise
    
+    def read_csv(self, filename: str, converters: dict | None = None) -> pd.DataFrame:
+        """Read CSV file with error handling and logging.
+        
+        Args:
+            filename: Name of the CSV file without extension
+            converters: Dictionary of converters for specific columns
+        
+        Returns:
+            DataFrame from CSV file
+        """
+        try:
+            filepath = f'{csv_directory}/{filename}_cards.csv'
+            df = pd.read_csv(filepath, converters=converters or {'themeTags': pd.eval, 'creatureTypes': pd.eval})
+            logging.debug(f"Successfully read {filename}_cards.csv")
+            return df
+        except FileNotFoundError as e:
+            logging.error(f"File {filename}_cards.csv not found: {e}")
+            raise
+        except Exception as e:
+            logging.error(f"Error reading {filename}_cards.csv: {e}")
+            raise
+
+    def write_csv(self, df: pd.DataFrame, filename: str) -> None:
+        """Write DataFrame to CSV with error handling and logging.
+        
+        Args:
+            df: DataFrame to write
+            filename: Name of the CSV file without extension
+        """
+        try:
+            filepath = f'{csv_directory}/{filename}.csv'
+            df.to_csv(filepath, index=False)
+            logging.debug(f"Successfully wrote {filename}.csv")
+        except Exception as e:
+            logging.error(f"Error writing {filename}.csv: {e}")
+            raise
+
    def setup_dataframes(self):
+        """Initialize and setup all required DataFrames."""
        all_df = []
        for file in self.files_to_load:
-            df = pd.read_csv(f'{csv_directory}/{file}_cards.csv', converters={'themeTags': pd.eval, 'creatureTypes': pd.eval})
+            df = self.read_csv(file)
            all_df.append(df)
-        self.full_df = pd.concat(all_df,ignore_index=True)
+        self.full_df = pd.concat(all_df, ignore_index=True)
        self.full_df.sort_values(by='edhrecRank', inplace=True)
        
        self.land_df = self.full_df[self.full_df['type'].str.contains('Land')].copy()
--- a/tagger.py
+++ b/tagger.py
@ -1,5 +1,6 @@
 from __future__ import annotations

+import logging
 import os
 import pandas as pd # type: ignore

@ -9,32 +10,57 @@ from settings import artifact_tokens, csv_directory, colors, counter_types, ench
 from setup import regenerate_csv_by_color
 from utility import pluralize, sort_list

+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+
 ### Setup
 ## Load the dataframe
-def load_dataframe(color):
-    """Load and validate the card dataframe for a given color"""
+def load_dataframe(color: str) -> pd.DataFrame:
+    """
+    Load and validate the card dataframe for a given color.
+    
+    Args:
+        color (str): The color of cards to load ('white', 'blue', etc)
+    
+    Returns:
+        pd.DataFrame: The loaded and validated dataframe
+    
+    Raises:
+        FileNotFoundError: If CSV file doesn't exist and can't be regenerated
+        ValueError: If required columns are missing
+    """
    try:
        filepath = f'{csv_directory}/{color}_cards.csv'
        
        # Check if file exists, regenerate if needed
        if not os.path.exists(filepath):
-            print(f'{color}_cards.csv not found, regenerating it.')
+            logging.warning(f'{color}_cards.csv not found, regenerating it.')
            regenerate_csv_by_color(color)
+            if not os.path.exists(filepath):
+                raise FileNotFoundError(f"Failed to generate {filepath}")
        
        # Load initial dataframe for validation
        check_df = pd.read_csv(filepath)
        
        # Validate required columns
-        required_columns = ['creatureTypes', 'themeTags']
+        required_columns = ['creatureTypes', 'themeTags'] 
        missing_columns = [col for col in required_columns if col not in check_df.columns]
        
        # Handle missing columns
        if missing_columns:
-            print(f"Missing columns: {missing_columns}")
+            logging.warning(f"Missing columns: {missing_columns}")
            if 'creatureTypes' not in check_df.columns:
                kindred_tagging(check_df, color)
            if 'themeTags' not in check_df.columns:
                create_theme_tags(check_df, color)
+                
+            # Verify columns were added successfully
+            check_df = pd.read_csv(filepath)
+            still_missing = [col for col in required_columns if col not in check_df.columns]
+            if still_missing:
+                raise ValueError(f"Failed to add required columns: {still_missing}")
        
        # Load final dataframe with proper converters
        df = pd.read_csv(filepath, converters={'themeTags': pd.eval, 'creatureTypes': pd.eval})
@ -43,11 +69,11 @@ def load_dataframe(color):
        tag_by_color(df, color)
        
    except FileNotFoundError as e:
-        print(f'Error: {e}')
+        logging.error(f'Error: {e}')
    except pd.errors.ParserError:
-        print('Error parsing the CSV file.')
+        logging.error('Error parsing the CSV file.')
    except Exception as e:
-        print(f'An unexpected error occurred: {e}')
+        logging.error(f'An unexpected error occurred: {e}')

 ## Tag cards on a color-by-color basis
 def tag_by_color(df, color):
@ -109,14 +135,14 @@ def tag_by_color(df, color):

 ## Determine any non-creature cards that have creature types mentioned
 def kindred_tagging(df, color):
-    print(f'Settings creature type tags on {color}_cards.csv.\n')
+    logging.info(f'Settings creature type tags on {color}_cards.csv.\n')
    print('===============\n')
    
    # Create new blank list column called 'creatureTypes
    df['creatureTypes'] = [[] for _ in range(len(df))]
    
    # Set creature types
-    print(f'Checking for and setting creature types in {color}_cards.csv')
+    logging.info(f'Checking for and setting creature types in {color}_cards.csv')
    for index, row in df.iterrows():
        if 'Creature' in row['type']:
            kindred_tags = []
@ -130,11 +156,11 @@ def kindred_tagging(df, color):
                            if creature_type == creature_types_individual:
                                kindred_tags.append(creature_type)
                                df.at[index, 'creatureTypes'] = kindred_tags
-    print(f'Creature types set in {color}_cards.csv.\n')
+    logging.info(f'Creature types set in {color}_cards.csv.\n')
    print('==========\n')
    
    # Set outlaws
-    print(f'Checking for and setting Outlaw types in {color}_cards.csv')
+    logging.info(f'Checking for and setting Outlaw types in {color}_cards.csv')
    outlaws = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
    for index, row in df.iterrows():
        if 'Creature' in row['type']:
@ -145,11 +171,11 @@ def kindred_tagging(df, color):
                    if 'Outlaw' not in kindred_tags:
                        kindred_tags.append('Outlaw')
                        df.at[index, 'creatureTypes'] = kindred_tags
-    print(f'Outlaw types set in {color}_cards.csv.\n')
+    logging.info(f'Outlaw types set in {color}_cards.csv.\n')
    print('==========\n')
    
    # Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
-    print(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
+    logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
    for index, row in df.iterrows():
        kindred_tags = row['creatureTypes']
        if pd.isna(row['text']):
@ -176,42 +202,57 @@ def kindred_tagging(df, color):
                        if creature_type not in kindred_tags:
                            kindred_tags.append(creature_type)
                            df.at[index, 'creatureTypes'] = kindred_tags
-    print(f'Creature types from text set in {color}_cards.csv.\n')
+    logging.info(f'Creature types from text set in {color}_cards.csv.\n')
    
    # Overwrite file with creature type tags
    columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'layout', 'side']
    df = df[columns_to_keep]
    df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
-    print(f'\nCreature types tagged on {color}_cards.csv.\n')
-
-## Create theme tags (i.e.)
-def create_theme_tags(df, color):
-    # Create a blank column for theme/effect tags
-    # This will setup a basis for future tagging to automate deck building
-    print(f'Creating theme/effect tag column on {color}_cards.csv.')
+    logging.info(f'\nCreature types tagged on {color}_cards.csv.\n')
    
-    # Create new blank list column called 'themeTags
-    df['themeTags'] = [[] for _ in range(len(df))]
+def create_theme_tags(df: pd.DataFrame, color: str) -> None:
+    """
+    Initialize the themeTags column and reorganize dataframe columns.
    
-    # Organize it's location
-    columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
-    df = df[columns_to_keep]
+    Args:
+        df (pd.DataFrame): The card dataframe to process
+        color (str): The color being processed for logging
+    """
+    logging.info(f'Creating theme/effect tag column on {color}_cards.csv.')
    
-    # Overwrite original file
+    # Initialize themeTags column efficiently
+    df['themeTags'] = pd.Series([[] for _ in range(len(df))])
+    
+    # Define column order
+    columns_to_keep = [
+        'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
+        'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
+        'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
+    ]
+    
+    # Reorder and select columns
+    try:
+        df = df[columns_to_keep]
+    except KeyError as e:
+        logging.warning(f"Warning: Missing expected column {e}")
+        # Continue with available columns
+        existing_cols = [col for col in columns_to_keep if col in df.columns]
+        df = df[existing_cols]
+    
+    # Save dataframe
    df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
-    print(f'Theme/effect tag column created on {color}_cards.csv.\n')
-
-## Add card types to the tags
+    logging.info(f'Theme/effect tag column created on {color}_cards.csv.\n')
+    
 def tag_for_card_types(df, color):
    # Iterate through each {color}_cards.csv file to find artifact cards
    # Also check for cards that care about artifacts
-    print(f'Settings card type tags on {color}_cards.csv.\n\n')
+    logging.info(f'Settings card type tags on {color}_cards.csv.\n\n')
    # Define the card types
    card_types = ['Artifact', 'Creature', 'Enchantment', 'Land', 'Instant', 'Sorcery', 'Planeswalker', 'Battle']
    
    # Tag for artifacts
    for card_type in card_types:
-        print(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
+        logging.info(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
        for index, row in df.iterrows():
            theme_tags = row['themeTags']
            if card_type in row['type']:
@ -234,10 +275,10 @@ def tag_for_card_types(df, color):
                    if tag not in theme_tags:
                        theme_tags.extend([tag])
                        df.at[index, 'themeTags'] = theme_tags
-        print(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
+        logging.info(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
        print('=====\n')
    # Overwrite file with artifact tag added
-    print(f'Card type tags set on {color}_cards.csv.\n')
+    logging.info(f'Card type tags set on {color}_cards.csv.\n')

 ## Add creature types to the theme tags
 def add_creatures_to_tags(df, color):
--- a/traycer_testing.py
+++ b/traycer_testing.py
@ -0,0 +1,61 @@
+def tag_for_cantrips(df, color):
+    """
+    Tag cards in the DataFrame as cantrips based on specific criteria.
+    
+    Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
+    The function excludes certain card types, keywords, and specific named cards
+    from being tagged as cantrips.
+    
+    Parameters:
+        df (pd.DataFrame): The DataFrame containing card data.
+        color (str): The color identifier for logging purposes.
+    
+    Returns:
+        None: The function modifies the DataFrame in place by applying tags.
+    """
+    logging.info('Tagging cantrips in %s_cards.csv', color)
+
+    # Convert mana value to numeric
+    df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
+
+    # Define exclusion conditions
+    excluded_types = df['type'].str.contains('Land|Equipment', na=False)
+    excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
+    has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
+
+    # Define name exclusions
+    EXCLUDED_NAMES = {
+        'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
+        'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
+        'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
+        'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
+        'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
+        "Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
+        "Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
+        'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
+        'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
+        'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
+        'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
+        'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
+        # Add other excluded names here
+    }
+    excluded_names = df['name'].isin(EXCLUDED_NAMES)
+
+    # Define cantrip conditions
+    has_draw = df['text'].str.contains('draw a card', case=False, na=False)
+    low_cost = df['manaValue'] <= 2
+
+    # Combine all conditions
+    cantrip_mask = (
+        ~excluded_types &
+        ~excluded_keywords &
+        ~has_loot &
+        ~excluded_names &
+        has_draw &
+        low_cost
+    )
+
+    # Apply tags using vectorized operation
+    apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
+
+    logging.info('Finished tagging cantrips in %s_cards.csv', color)