Started work refactoring the tagging functions by using Traycer

2025-12-17 16:10:12 +01:00 · 2024-12-31 12:04:10 -08:00 · 2024-12-31 12:04:10 -08:00 · a4abea2c3c
commit a4abea2c3c
parent e404515d91
4 changed files with 736 additions and 516 deletions
--- a/settings.py
+++ b/settings.py
@ -32,6 +32,18 @@ board_wipe_tags = ['destroy all', 'destroy each', 'return all', 'return each', '
 card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery',
              'Kindred', 'Dungeon', 'Battle']
 # Mapping of card types to their corresponding theme tags
 TYPE_TAG_MAPPING = {
    'Artifact': ['Artifacts Matter'],
    'Battle': ['Battles Matter'],
    #'Creature': [],
    'Enchantment': ['Enchantments Matter'],
    'Instant': ['Spells Matter', 'Spellslinger'],
    'Land': ['Lands Matter'],
    'Planeswalker': ['Superfriends'],
    'Sorcery': ['Spells Matter', 'Spellslinger']
 }
 csv_directory = 'csv_files'
 colors = ['colorless', 'white', 'blue', 'black', 'red', 'green',
@ -149,3 +161,31 @@ targetted_removal_tags = ['exile target', 'destroy target', 'return target', 'sh
                'deals damage to target', 'loses all abilities']
 triggers = ['when', 'whenever', 'at']
 # Constants for DataFrame validation and processing
 REQUIRED_COLUMNS = [
    'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
    'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
    'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
 ]
 DEFAULT_THEME_TAGS = [
    'Aggro', 'Aristocrats', 'Artifacts Matter', 'Big Mana', 'Blink',
    'Board Wipes', 'Burn', 'Cantrips', 'Card Draw', 'Clones',
    'Combat Matters', 'Control', 'Counters Matter', 'Energy',
    'Enter the Battlefield', 'Equipment', 'Exile Matters', 'Infect',
    'Interaction', 'Lands Matter', 'Leave the Battlefield', 'Legends Matter',
    'Life Matters', 'Mill', 'Monarch', 'Protection', 'Ramp', 'Reanimate',
    'Removal', 'Sacrifice Matters', 'Spellslinger', 'Stax', 'Super Friends',
    'Theft', 'Token Creation', 'Tokens Matter', 'Voltron', 'X Spells'
 ]
 COLUMN_ORDER = [
    'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
    'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
    'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
 ]
 # Constants for type detection and processing
 OUTLAW_TYPES = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
 TYPE_DETECTION_BATCH_SIZE = 1000
--- a/tagger.py
+++ b/tagger.py
@ -2,17 +2,47 @@ from __future__ import annotations
 import logging
 import os
 import re
 from typing import Dict, List, Optional, Set, Union
 import pandas as pd # type: ignore
 import settings
 import utility
 from settings import artifact_tokens, csv_directory, colors, counter_types, enchantment_tokens, multiple_copy_cards, num_to_search, triggers
 from setup import regenerate_csv_by_color
-from utility import pluralize, sort_list
+
 # Constants for common tag groupings
 TAG_GROUPS = {
    "Cantrips": ["Cantrips", "Card Draw", "Spellslinger", "Spells Matter"],
    "Tokens": ["Token Creation", "Tokens Matter"],
    "Counters": ["Counters Matter"],
    "Combat": ["Combat Matters", "Combat Tricks"],
    "Artifacts": ["Artifacts Matter", "Artifact Tokens"],
    "Enchantments": ["Enchantments Matter", "Enchantment Tokens"],
    "Lands": ["Lands Matter"],
    "Spells": ["Spellslinger", "Spells Matter"]
 }
 # Common regex patterns
 PATTERN_GROUPS = {
    "draw": r"draw[s]? a card|draw[s]? one card",
    "combat": r"attack[s]?|block[s]?|combat damage",
    "tokens": r"create[s]? .* token|put[s]? .* token",
    "counters": r"\+1/\+1 counter|\-1/\-1 counter|loyalty counter",
    "sacrifice": r"sacrifice[s]? .*|sacrificed",
    "exile": r"exile[s]? .*|exiled"
 }
 logging.basicConfig(
    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
+    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('tagger.log', mode='w')
    ]
 )
 ### Setup
@ -70,10 +100,13 @@ def load_dataframe(color: str) -> pd.DataFrame:
    except FileNotFoundError as e:
        logging.error(f'Error: {e}')
-    except pd.errors.ParserError:
+        raise
-        logging.error('Error parsing the CSV file.')
+    except pd.errors.ParserError as e:
        logging.error(f'Error parsing the CSV file: {e}')
        raise
    except Exception as e:
        logging.error(f'An unexpected error occurred: {e}')
        raise
 ## Tag cards on a color-by-color basis
 def tag_by_color(df, color):
@ -90,41 +123,41 @@ def tag_by_color(df, color):
    print('====================\n')
    create_theme_tags(df, color)
    print('====================\n')
-    
+    #
-    # Go through each type of tagging
+    ## Go through each type of tagging
-    add_creatures_to_tags(df, color)
+    #add_creatures_to_tags(df, color)
-    print('====================\n')
+    #print('====================\n')
    tag_for_card_types(df, color)
    print('====================\n')
-    tag_for_keywords(df, color)
+    #tag_for_keywords(df, color)
-    print('====================\n')
+    #print('====================\n')
-    
+    #
-    # Tag for various effects
+    ## Tag for various effects
-    tag_for_cost_reduction(df, color)
+    #tag_for_cost_reduction(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_card_draw(df, color)
+    #tag_for_card_draw(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_artifacts(df, color)
+    #tag_for_artifacts(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_enchantments(df, color)
+    #tag_for_enchantments(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_exile_matters(df, color)
+    #tag_for_exile_matters(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_tokens(df, color)
+    #tag_for_tokens(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_life_matters(df, color)
+    #tag_for_life_matters(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_counters(df, color)
+    #tag_for_counters(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_voltron(df, color)
+    #tag_for_voltron(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_spellslinger(df, color)
+    #tag_for_spellslinger(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_ramp(df, color)
+    #tag_for_ramp(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_themes(df, color)
+    #tag_for_themes(df, color)
-    print('====================\n')
+    #print('====================\n')
-    tag_for_interaction(df, color)
+    #tag_for_interaction(df, color)
    # Lastly, sort all theme tags for easier reading
    sort_theme_tags(df, color)
@ -134,155 +167,236 @@ def tag_by_color(df, color):
    #keyboard.wait('esc')
 ## Determine any non-creature cards that have creature types mentioned
-def kindred_tagging(df, color):
+def kindred_tagging(df: pd.DataFrame, color: str) -> None:
-    logging.info(f'Settings creature type tags on {color}_cards.csv.\n')
+    """Tag cards with creature types and related types.
    print('===============\n')
    # Create new blank list column called 'creatureTypes
    df['creatureTypes'] = [[] for _ in range(len(df))]
    # Set creature types
    logging.info(f'Checking for and setting creature types in {color}_cards.csv')
    for index, row in df.iterrows():
        if 'Creature' in row['type']:
            kindred_tags = []
            creature_types = row['type']
            split_types = creature_types.split()
            for creature_type in split_types:    
                # If the type is a non-creature type ignore it
                if creature_type not in settings.non_creature_types:
                    if creature_type not in kindred_tags:
                        for creature_types_individual in settings.creature_types:
                            if creature_type == creature_types_individual:
                                kindred_tags.append(creature_type)
                                df.at[index, 'creatureTypes'] = kindred_tags
    logging.info(f'Creature types set in {color}_cards.csv.\n')
    print('==========\n')
    # Set outlaws
    logging.info(f'Checking for and setting Outlaw types in {color}_cards.csv')
    outlaws = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
    for index, row in df.iterrows():
        if 'Creature' in row['type']:
            kindred_tags = row['creatureTypes']
            creature_types = kindred_tags
            for creature_type in creature_types:
                if creature_type in outlaws:
                    if 'Outlaw' not in kindred_tags:
                        kindred_tags.append('Outlaw')
                        df.at[index, 'creatureTypes'] = kindred_tags
    logging.info(f'Outlaw types set in {color}_cards.csv.\n')
    print('==========\n')
    # Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
    logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
    for index, row in df.iterrows():
        kindred_tags = row['creatureTypes']
        if pd.isna(row['text']):
            continue
        split_text = row['text'].split()
        ignore_list = ['Elite Inquisitor', 'Breaker of Armies', 'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon']
        for creature_type in settings.creature_types:
            if row['name'] in ignore_list:
                continue
            if creature_type in row['name']:
                continue
            if pluralize(f'{creature_type}') in row['name']:
                continue
            for word in split_text:
                if creature_type == word:
                    if creature_type not in row['name']:
                        if creature_type not in kindred_tags:
                            kindred_tags.append(creature_type)
                            df.at[index, 'creatureTypes'] = kindred_tags
                # Tag for pluralized types (i.e. Elves, Wolves, etc...) in textbox
                if pluralize(f'{creature_type}') == word:
                    if pluralize(f'{creature_type}') not in row['name']:
                        if creature_type not in kindred_tags:
                            kindred_tags.append(creature_type)
                            df.at[index, 'creatureTypes'] = kindred_tags
    logging.info(f'Creature types from text set in {color}_cards.csv.\n')
    # Overwrite file with creature type tags
    columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'layout', 'side']
    df = df[columns_to_keep]
    df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
    logging.info(f'\nCreature types tagged on {color}_cards.csv.\n')
 def create_theme_tags(df: pd.DataFrame, color: str) -> None:
    """
    Initialize the themeTags column and reorganize dataframe columns.
    Args:
-        df (pd.DataFrame): The card dataframe to process
+        df: DataFrame containing card data
-        color (str): The color being processed for logging
+        color: Color identifier for logging
    """
-    logging.info(f'Creating theme/effect tag column on {color}_cards.csv.')
+    start_time = pd.Timestamp.now()
    logging.info(f'Setting creature type tags on {color}_cards.csv')
-    # Initialize themeTags column efficiently
+    try:
-    df['themeTags'] = pd.Series([[] for _ in range(len(df))])
+        # Initialize creatureTypes column vectorized
        df['creatureTypes'] = pd.Series([[] for _ in range(len(df))])
-    # Define column order
+        # Detect creature types using mask
-    columns_to_keep = [
+        creature_mask = utility.create_type_mask(df, 'Creature')
-        'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
+        if creature_mask.any():
-        'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
+            creature_rows = df[creature_mask]
-        'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
+            for idx, row in creature_rows.iterrows():
                types = utility.extract_creature_types(
                    row['type'],
                    settings.creature_types,
                    settings.non_creature_types
                )
                if types:
                    df.at[idx, 'creatureTypes'] = types
        creature_time = pd.Timestamp.now()
        logging.info(f'Creature type detection completed in {(creature_time - start_time).total_seconds():.2f}s')
        print('==========\n')
        # Process outlaw types
        outlaws = settings.OUTLAW_TYPES
        df['creatureTypes'] = df.apply(
            lambda row: utility.add_outlaw_type(row['creatureTypes'], outlaws)
            if isinstance(row['creatureTypes'], list) else row['creatureTypes'],
            axis=1
        )
        outlaw_time = pd.Timestamp.now()
        logging.info(f'Outlaw type processing completed in {(outlaw_time - creature_time).total_seconds():.2f}s')
        # Find creature types in text
        logging.info('Checking for creature types in card text')
        # Check for creature types in text (i.e. how 'Voja, Jaws of the Conclave' cares about Elves)
        logging.info(f'Checking for and setting creature types found in the text of cards in {color}_cards.csv')
        ignore_list = [
            'Elite Inquisitor', 'Breaker of Armies',
            'Cleopatra, Exiled Pharaoh', 'Nath\'s Buffoon'
        ]
-    # Reorder and select columns
+        for idx, row in df.iterrows():
            if row['name'] not in ignore_list:
                text_types = utility.find_types_in_text(
                    row['text'],
                    row['name'], 
                    settings.creature_types
                )
                if text_types:
                    current_types = row['creatureTypes']
                    if isinstance(current_types, list):
                        df.at[idx, 'creatureTypes'] = sorted(
                            list(set(current_types + text_types))
                        )
        text_time = pd.Timestamp.now()
        logging.info(f'Text-based type detection completed in {(text_time - outlaw_time).total_seconds():.2f}s')
        # Save results
        try:
            columns_to_keep = [
                'name', 'faceName', 'edhrecRank', 'colorIdentity',
                'colors', 'manaCost', 'manaValue', 'type',
                'creatureTypes', 'text', 'power', 'toughness',
                'keywords', 'layout', 'side'
            ]
            df = df[columns_to_keep]
-    except KeyError as e:
+            df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
-        logging.warning(f"Warning: Missing expected column {e}")
+            total_time = pd.Timestamp.now() - start_time
-        # Continue with available columns
+            logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
        existing_cols = [col for col in columns_to_keep if col in df.columns]
        df = df[existing_cols]
-    # Save dataframe
+        except Exception as e:
-    df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
+            logging.error(f'Error saving results: {e}')
    logging.info(f'Theme/effect tag column created on {color}_cards.csv.\n')
-def tag_for_card_types(df, color):
+    # Overwrite file with creature type tags
-    # Iterate through each {color}_cards.csv file to find artifact cards
+    except Exception as e:
-    # Also check for cards that care about artifacts
+        logging.error(f'Error in kindred_tagging: {e}')
-    logging.info(f'Settings card type tags on {color}_cards.csv.\n\n')
+        raise
    # Define the card types
    card_types = ['Artifact', 'Creature', 'Enchantment', 'Land', 'Instant', 'Sorcery', 'Planeswalker', 'Battle']
-    # Tag for artifacts
+def create_theme_tags(df: pd.DataFrame, color: str) -> None:
-    for card_type in card_types:
+    """Initialize and configure theme tags for a card DataFrame.
        logging.info(f'Tagging cards in {color}_cards.csv that have the "{card_type}" type.')
        for index, row in df.iterrows():
            theme_tags = row['themeTags']
            if card_type in row['type']:
                tag_type = []
-                # Tagging for artifacts, enchantments, and lands matter
+    This function initializes the themeTags column, validates the DataFrame structure,
-                if card_type in ['Artifact', 'Battle', 'Enchantment', 'Land']:
+    and reorganizes columns in an efficient manner. It uses vectorized operations
-                    tag_type.extend([f'{card_type}s Matter'])
+    for better performance.
-                # Tagging for spellslinger/spells matter
+    Args:
-                if card_type in ['Instant', 'Sorcery']:
+        df: DataFrame containing card data to process
-                    tag_type.extend(['Spells Matter', 'Spellslinger'])
+        color: Color identifier for logging purposes (e.g. 'white', 'blue')
-                # Tagging for Superfriends
+    Returns:
-                if card_type == 'Planeswalker':
+        The processed DataFrame with initialized theme tags and reorganized columns
                    tag_type.extend(['Superfriends'])
-                # Adding the tags
+    Raises:
-                for tag in tag_type:
+        ValueError: If required columns are missing or color is invalid
-                    if tag not in theme_tags:
+        TypeError: If inputs are not of correct type
-                        theme_tags.extend([tag])
+    """
-                        df.at[index, 'themeTags'] = theme_tags
+    start_time = pd.Timestamp.now()
-        logging.info(f'Cards with the "{card_type}" type in {color}_cards.csv have been tagged.\n')
+    logging.info('Initializing theme tags for %s cards', color)
-        print('=====\n')
+
    # Validate inputs
    if not isinstance(df, pd.DataFrame):
        raise TypeError("df must be a pandas DataFrame")
    if not isinstance(color, str):
        raise TypeError("color must be a string")
    if color not in settings.colors:
        raise ValueError(f"Invalid color: {color}")
    try:
        # Initialize themeTags column using vectorized operation
        df['themeTags'] = pd.Series([[] for _ in range(len(df))], index=df.index)
        # Define expected columns
        required_columns = {
            'name', 'text', 'type', 'keywords',
            'creatureTypes', 'power', 'toughness'
        }
        # Validate required columns
        missing = required_columns - set(df.columns)
        if missing:
            raise ValueError(f"Missing required columns: {missing}")
        # Define column order
        columns_to_keep = settings.REQUIRED_COLUMNS
        # Reorder columns efficiently
        available_cols = [col for col in columns_to_keep if col in df.columns]
        df = df.reindex(columns=available_cols)
        # Save results
        try:
            df.to_csv(f'{settings.csv_directory}/{color}_cards.csv', index=False)
            total_time = pd.Timestamp.now() - start_time
            logging.info(f'Creature type tagging completed in {total_time.total_seconds():.2f}s')
            # Log performance metrics
            end_time = pd.Timestamp.now()
            duration = (end_time - start_time).total_seconds()
            logging.info('Theme tags initialized in %.2f seconds', duration)
        except Exception as e:
            logging.error(f'Error saving results: {e}')
    except Exception as e:
        logging.error('Error initializing theme tags: %s', str(e))
        raise
 def create_text_mask(df: pd.DataFrame, pattern: str) -> pd.Series:
    """Create a boolean mask for rows where text matches a pattern.
    Args:
        df: The dataframe to search
        pattern: Regex pattern to match
    Returns:
        Boolean series indicating matching rows
    """
    return df['text'].str.contains(pattern, case=False, na=False, regex=True)
 def create_keyword_mask(df: pd.DataFrame, keywords: Union[str, List[str]]) -> pd.Series:
    """Create a boolean mask for rows with matching keywords.
    Args:
        df: The dataframe to search
        keywords: Keyword or list of keywords to match
    Returns:
        Boolean series indicating matching rows
    """
    if isinstance(keywords, str):
        keywords = [keywords]
    return df['keywords'].str.contains('|'.join(keywords), case=False, na=False)
 def tag_for_card_types(df: pd.DataFrame, color: str) -> None:
    """Tag cards based on their types using vectorized operations.
    This function efficiently applies tags based on card types using vectorized operations.
    It handles special cases for different card types and maintains compatibility with
    the existing tagging system.
    Args:
        df: DataFrame containing card data
        color: Color identifier for logging purposes
    Raises:
        ValueError: If required columns are missing
    """
    start_time = pd.Timestamp.now()
    logging.info('Setting card type tags on %s_cards.csv', color)
    try:
        # Validate required columns
        required_cols = {'type', 'themeTags'}
        if not required_cols.issubset(df.columns):
            raise ValueError(f"Missing required columns: {required_cols - set(df.columns)}")
        # Define type-to-tag mapping
        type_tag_map = settings.TYPE_TAG_MAPPING
        # Process each card type
        for card_type, tags in type_tag_map.items():
            mask = utility.create_type_mask(df, card_type)
            if mask.any():
                utility.apply_tag_vectorized(df, mask, tags)
                logging.info('Tagged %d cards with %s type', mask.sum(), card_type)
        # Log completion
        duration = (pd.Timestamp.now() - start_time).total_seconds()
        logging.info('Card type tagging completed in %.2fs', duration)
    except Exception as e:
        logging.error('Error in tag_for_card_types: %s', str(e))
        raise
    # Overwrite file with artifact tag added
    logging.info(f'Card type tags set on {color}_cards.csv.\n')
 ## Add creature types to the theme tags
 def add_creatures_to_tags(df, color):
-    print(f'Adding creature types to theme tags in {color}_cards.csv.')
+    logging.info(f'Adding creature types to theme tags in {color}_cards.csv.')
    # Add kindred_tags to creatureTypes column
    for index, row in df.iterrows():
@ -294,24 +408,7 @@ def add_creatures_to_tags(df, color):
                df.at[index, 'themeTags'] = theme_tags
    # Overwrite file with kindred tags added
-    print(f'Creature types added to theme tags in {color}_cards.csv.\n')
+    logging.info(f'Creature types added to theme tags in {color}_cards.csv.')
    print('==========\n')
    # Set Kindred Support
    print(f'Checking for and setting Kindred Support tag in {color}_cards.csv')
    all_kindred = ['changeling', 'choose a creature type', 'shares a creature type',
                   'shares at least one creature type', 'you control of the chosen type']
    for index, row in df.iterrows():
        if pd.isna(row['text']):
            continue
        theme_tags = row['themeTags']
        for item in all_kindred:
            if item in row['text'].lower():
                if 'Kindred Support' not in theme_tags:
                    theme_tags.extend(['Kindred Support'])
                    df.at[index, 'themeTags'] = theme_tags
    print(f'"Kindred Support" tag set in {color}_cards.csv.\n')
 ## Add keywords to theme tags
 def tag_for_keywords(df, color):
@ -336,7 +433,7 @@ def tag_for_keywords(df, color):
 def sort_theme_tags(df, color):
    print(f'Alphabetically sorting theme tags in {color}_cards.csv.')
-    df['themeTags'] = df['themeTags'].apply(sort_list)
+    df['themeTags'] = df['themeTags'].apply(utility.sort_list)
    columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
    df = df[columns_to_keep]
@ -680,6 +777,62 @@ def tag_for_loot(df, color):
    # Overwrite file with loot tag added
    print(f'Loot cards tagged in {color}_cards.csv.\n')
 def tag_for_cantrips(df: pd.DataFrame, color: str) -> None:
    """Tag cards in the DataFrame as cantrips based on specific criteria.
    Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
    The function excludes certain card types, keywords, and specific named cards
    from being tagged as cantrips.
    Args:
        df: The DataFrame containing card data
        color: The color identifier for logging purposes
    """
    logging.info('Tagging cantrips in %s_cards.csv', color)
    try:
        # Convert mana value to numeric
        df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
        # Create exclusion masks
        excluded_types = create_text_mask(df, 'Land|Equipment')
        excluded_keywords = create_keyword_mask(df, ['Channel', 'Cycling', 'Connive', 'Learn', 'Ravenous'])
        has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
        # Define name exclusions
        EXCLUDED_NAMES = {
            'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal',
            'Blood Scrivener', 'Brigon, Soldier of Meletis', 'Compost', 'Cryptbreaker', 'Curiosity',
            'Dakra Mystic', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Flumph',
            'Folk Hero', 'Goblin Artisans', 'Hatching Plans', 'Ideas Unbound', 'Kor Spiritdancer',
            'Mindblade Render', 'Puresteel Paladin', 'Read the Runes', 'Sram, Senior Edificer',
            'Sygg, River Cutthroat', 'Wall of Mulch', 'Waste Not'
        }
        excluded_names = df['name'].isin(EXCLUDED_NAMES)
        # Create cantrip condition masks
        has_draw = create_text_mask(df, PATTERN_GROUPS['draw'])
        low_cost = df['manaValue'].fillna(float('inf')) <= 2
        # Combine conditions
        cantrip_mask = (
            ~excluded_types &
            ~excluded_keywords &
            ~has_loot &
            ~excluded_names &
            has_draw &
            low_cost
        )
        # Apply tags
        utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
        logging.info('Successfully tagged cantrips in %s_cards.csv', color)
    except Exception as e:
        logging.error('Error tagging cantrips in %s_cards.csv: %s', color, str(e))
        raise
 ## Connive Effects 
 def tag_for_connive(df, color):
    print(f'Checking {color}_cards.csv for cards that have or care about "Connive".')
@ -1509,9 +1662,8 @@ def tag_auras(df, color):
                    theme_tags.extend([tag])
                    df.at[index, 'themeTags'] = theme_tags
-    # Overwrite file with aura tag added
+    # Overwrite file with Aura tag added
-    #df.to_csv(f'{csv_directory}/{color}_cards.csv', index=False)
+    logging.info(f'Cards that have or care about "Aura" in {color}_cards.csv have been tagged.')
    print(f'Cards that have or care about "Auraa" in {color}_cards.csv have been tagged.\n')
 ## Constellation
 def tag_constellation(df, color):
@ -2684,244 +2836,67 @@ def tag_for_magecraft(df, color):
 ## Cantrips
 def tag_for_cantrips(df, color):
-    # Iterate through each {color}_cards.csv file to find spells matter cards
+    """
-    print(f'Settings "Cantrip" tags on {color}_cards.csv.')
+    Tag cards in the DataFrame as cantrips based on specific criteria.
    df['manaValue'] = df['manaValue'].astype(int)
-    # Logic for Cantrip cards
+    Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
-    print('Note: I am considering a cantrip to be a card that has a mana value of 0-2,\n'
+    The function excludes certain card types, keywords, and specific named cards
-            'does some effect, and draws cards.\n\n'
+    from being tagged as cantrips.
            'This also includes activated abilities, that when the combined mana value\n'
            'and ability cost are less than 2 mana.\n')
    for index, row in df.iterrows():
        theme_tags = row['themeTags']
        # Take out Lands and Equipment
        if ('Land' in row['type']
            or 'Equipment' in row['type']):
            continue
-        # Remove ones that have specific kewords
+    Parameters:
-        if pd.notna(row['keywords']):
+        df (pd.DataFrame): The DataFrame containing card data.
-            if ('Channel' in row['keywords']
+        color (str): The color identifier for logging purposes.
                or 'Cycling' in row['keywords']
                or 'Connive' in row['keywords']
                or 'Learn' in row['keywords']
                or 'Ravenous' in row['keywords']
                ):
                continue
-        # Remove cards that loot or have loot effects
+    Returns:
-        if ('Loot' in row['themeTags']
+        None: The function modifies the DataFrame in place by applying tags.
-            ):
+    """
-            continue
+    logging.info('Tagging cantrips in %s_cards.csv', color)
-        # Exclude specific cards
+    # Convert mana value to numeric
-        # By name
+    df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
        if ('Archivist of Oghma' == row['name']
            or 'Argothian Enchantress' == row['name']
            or 'Audacity' == row['name']
            or 'Betrayal' == row['name']
            or 'Bequeathal' == row['name']
            or 'Blood Scrivener' == row['name']
            or 'Brigone, Soldier of Meletis' == row['name']
            or 'compost' == row['name']
            or 'Concealing Curtains // Revealing Eye' == row['name']
            or 'Cryptbreaker' == row['name']
            or 'Curiosity' == row['name']
            or 'Curse of Vengenace' == row['name']
            or 'Cryptex' == row['name']
            or 'Dakra Mystic' == row['name']
            or 'Dawn of a New Age' == row['name']
            or 'Dockside Chef' == row['name']
            or 'Dreamcatcher' == row['name']
            or 'Edgewall Innkeeper' == row['name']
            or 'Eidolon of Philosphy' == row['name']
            or 'Evolveld Sleeper' == row['name']
            or 'Femeref Enchantress' == row['name']
            or 'Finneas, Ace Archer' == row['name']
            or 'Flumph' == row['name']
            or 'Folk Hero' == row['name']
            or 'Frodo, Adventurous Hobbit' == row['name']
            or 'Goblin Artisans' == row['name']
            or 'Goldberry, River-Daughter' == row['name']
            or 'Gollum, Scheming Guide' == row['name']
            or 'Hatching Plans' == row['name']
            or 'Ideas Unbound' == row['name']
            or 'Ingenius Prodigy' == row['name']
            or 'Ior Ruin Expedition' == row['name']
            or 'Jace\'s Erasure' == row['name']
            or 'Keeper of the Mind' == row['name']
            or 'Kor Spiritdancer' == row['name']
            or 'Lodestone Bauble' == row['name']
            or 'Puresteel Paladin' == row['name']
            or 'Jeweled Bird' == row['name']
            or 'Mindblade Render' == row['name']
            or 'Multani\'s Presence' == row['name']
            or 'Nahiri\'s Lithoforming' == row['name']
            or 'Ordeal of Thassa' == row['name']
            or 'Pollywog Prodigy' == row['name']
            or 'Priest of Forgotten Gods' == row['name']
            or 'RAvenous Squirrel' == row['name']
            or 'Read the Runes' == row['name']
            or 'Red Death, Shipwrecker' == row['name']
            or 'Roil Cartographer' == row['name']
            or 'Sage of Lat-Nam' == row['name']
            or 'Saprazzan Heir' == row['name']
            or 'Scion of Halaster' == row['name']
            or 'See Beyond' == row['name']
            or 'Selhoff Entomber' == row['name']
            or 'Shielded Aether Thief' == row['name']
            or 'Shore Keeper' == row['name']
            or 'Silverquill Silencer' == row['name']
            or 'Soldevi Sage' == row['name']
            or 'Soldevi Sentry' == row['name']
            or 'Spiritual Focus' == row['name']
            or 'Sram, Senior Edificer' == row['name']
            or 'Staff of the Storyteller' == row['name']
            or 'Stirge' == row['name']
            or 'Sylvan Echoes' == row['name']
            or 'Sythis, Harvest\'s Hand' == row['name']
            or 'Sygg, River Cutthroat' == row['name']
            or 'Tenuous Truce' == row['name']
            or 'Test of Talents' == row['name']
            or 'Thalakos Seer' == row['name']
            or 'Tribute to Horobi // Echo of Death\'s Wail' == row['name']
            or 'Vampire Gourmand' == row['name']
            or 'Vampiric Rites' == row['name']
            or 'Vampirism' == row['name']
            or 'Vessel of Paramnesia' == row['name']
            or 'Witch\'s Cauldron' == row['name']
            or 'Wall of Mulch' == row['name']
            or 'Waste Not' == row['name']
            or 'Well Rested' == row['name']
            ):
            continue
-        # Matching text or triggers
+    # Define exclusion conditions
-        if pd.notna(row['text']):
+    excluded_types = df['type'].str.contains('Land|Equipment', na=False)
-            if ('cast from exile, you draw a card' in row['text']
+    excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
-                or 'commit a crime, draw a card' in row['text']
+    has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
                or 'deals damage to an opponent' in row['text'].lower()
                or 'deals combat damage to a player' in row['text'].lower()
                or 'deals combat damage to a player, you may draw a card' in row['text'].lower()
                or 'deals combat damage to a player, draw a card' in row['text'].lower()
                or 'deals combat damage to an opponent' in row['text'].lower()
                or 'first time this turn, draw' in row['text'].lower()
                or 'Gift a card' in row['text']
                or 'give a gift' in row['text'].lower()
                or 'then draw a card if it has' in row['text']
                or 'target of a spell, draw' in row['text']
                or 'target of a spell you control, draw' in row['text']
                or 'unless that player pays' in row['text']
-                # Matches relating to skipping draws
+    # Define name exclusions
-                or 'draw step, instead you may skip' in row['text'].lower()
+    EXCLUDED_NAMES = {
-                or 'skip that draw' in row['text'].lower()
+        'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
-                ): 
+        'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
-                continue
+        'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
        'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
        'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
        "Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
        "Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
        'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
        'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
        'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
        'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
        'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
        # Add other excluded names here
    }
    excluded_names = df['name'].isin(EXCLUDED_NAMES)
-        else:
+    # Define cantrip conditions with enhanced pattern matching
-            if (row['manaValue'] == 0
+    draw_patterns = r'draw[s]? a card|draw[s]? one card'
-            or row['manaValue'] == 1
+    has_draw = df['text'].str.contains(draw_patterns, case=False, na=False)
-            or row['manaValue'] == 2
+    low_cost = df['manaValue'].fillna(float('inf')) <= 2  # Handle NaN values safely
            ):
                if pd.notna(row['text']):
                    if ('draw a card' in row['text'].lower()
                        or 'draw a card.' in row['text'].lower()
                        or 'draw two cards' in row['text'].lower()
                        or 'draw three cards' in row['text'].lower()
                        or 'draw x cards' in row['text'].lower()
                        or 'draws a card' in row['text'].lower()
                        ):
                        if ('enters, draw a card' in row['text']
                            or 'enters, you draw a card' in row['text']
                            or 'enters, you may draw a card' in row['text']
-                            # Specific cards
+    # Combine all conditions
-                            or 'Cling to Dust' == row['name']
+    cantrip_mask = (
-                            or 'Deduce' == row['name']
+        ~excluded_types &
-                            or 'Everdream' == row['name']
+        ~excluded_keywords &
-                            or 'Inverted Iceberg' == row['name']
+        ~has_loot &
-                            or 'Lunar Rejection' == row['name']
+        ~excluded_names &
-                            or 'Open of the Sea' == row['name']
+        has_draw &
-                            or 'Pawpatch Formation' == row['name']
+        low_cost
-                            or 'Scour All Possibilities' == row['name']
+    )
                            or 'Sleight of Hand' == row['name']
                            or 'Think Twice' == row['name']
                            or 'Train of Thought' == row['name']
                            or 'Whispers of the Muse' == row['name']
                            ):
                            tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
                            for tag in tag_type:
                                if tag not in theme_tags:
                                    theme_tags.extend([tag])
                                    df.at[index, 'themeTags'] = theme_tags
                        elif ('{T}: Draw a card' in row['text']
                            or '{T}: Draw' in row['text']
                            or 'another legendary creature, draw a card' in row['text'].lower()
                            or 'artifact or land: draw' in row['text'].lower()
                            or 'Blood token' in row['text']
                            or 'creature you control, draw' in row['text'].lower()
                            or 'creature\'s toughness' in row['text'].lower()
                            or 'Clue' in row['type']
                            or 'dies, draw' in row['text']
                            or 'dies, choose one' in row['text']
                            or 'dies, you draw a card' in row['text']
                            or 'discard' in row['text'].lower()
                            or 'discard a card' in row['text'].lower()
                            or 'discard your hand' in row['text'].lower()
                            or 'each player may draw' in row['text'].lower()
                            or 'each other player' in row['text']
                            or 'each opponent. draw' in row['text'].lower()
                            or 'flip a coin' in row['text']
                            or 'if a player would draw' in row['text'].lower()
                            or 'if an opponent would draw' in row['text'].lower()
                            or 'if you would draw' in row['text'].lower()
                            or 'sacrifice a land: draw' in row['text'].lower()
                            or 'each player may draw' in row['text'].lower()
                            or 'opponent controls, draw' in row['text'].lower()
                            or 'opponent controls, you may draw' in row['text'].lower()
                            or 'or greater, draw a card' in row['text'].lower()
                            or 'this turn, draw a card' in row['text'].lower()
                            or 'turned face up, draw a card' in row['text'].lower()
                            or 'upkeep, each player draws' in row['text'].lower()
                            or 'you countrol: draw a card' in row['text'].lower()
                            or 'you may pay' in row['text']
                            or 'whenever an opponent draws a card' in row['text'].lower()
                            or f'{{1}}, Sacrifice {row['name']}: Draw a card' in row['text']
                            or f'{row['name']} dies' in row['text']
                            or f'{row['name']} dies, draw a card' in row['text']
                            or f'{row['name']} dies, you may draw a card' in row['text']
                            ):
                            continue
                        elif ('{1}' in row['text']
                                or '{2}' in row['text']
                                or '{3}' in row['text']
                                or '{4}' in row['text']
                                or '{5}' in row['text']
                                ):
                                ability_costs = [1, 2, 3]
                                for i in ability_costs:
                                    if (f'{{{i}}}' in row['text']
                                        or f'pay {i} life: draw' in row['text'].lower()
                                        ):
                                        if i + row['manaValue'] >= 3:
                                            continue     
                                        else:
                                            tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
                                            for tag in tag_type:
                                                if tag not in theme_tags:
                                                    theme_tags.extend([tag])
                                                    df.at[index, 'themeTags'] = theme_tags
                        else:
                            tag_type = ['Cantrips', 'Spellslinger', 'Spells Matter']
                            for tag in tag_type:
                                if tag not in theme_tags:
                                    theme_tags.extend([tag])
                                    df.at[index, 'themeTags'] = theme_tags
-    # Overwrite file with Spells Matter tag added
+    # Apply tags using vectorized operation
-    print(f'"Cantrip" themed cards in {color}_cards.csv have been tagged.\n')
+    utility.apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
    logging.info('Finished tagging cantrips in %s_cards.csv', color)
 ## Spell Copy
 def tag_for_spell_copy(df, color):
@ -4904,5 +4879,6 @@ def tag_for_removal(df, color):
 #regenerate_csv_by_color('colorless')
-for color in colors:
+#for color in colors:
-    load_dataframe(color)
+#    load_dataframe(color)
 load_dataframe('colorless')
--- a/traycer_testing.py
+++ b/traycer_testing.py
@ -1,61 +0,0 @@
 def tag_for_cantrips(df, color):
    """
    Tag cards in the DataFrame as cantrips based on specific criteria.
    Cantrips are defined as low-cost spells (mana value <= 2) that draw cards.
    The function excludes certain card types, keywords, and specific named cards
    from being tagged as cantrips.
    Parameters:
        df (pd.DataFrame): The DataFrame containing card data.
        color (str): The color identifier for logging purposes.
    Returns:
        None: The function modifies the DataFrame in place by applying tags.
    """
    logging.info('Tagging cantrips in %s_cards.csv', color)
    # Convert mana value to numeric
    df['manaValue'] = pd.to_numeric(df['manaValue'], errors='coerce')
    # Define exclusion conditions
    excluded_types = df['type'].str.contains('Land|Equipment', na=False)
    excluded_keywords = df['keywords'].str.contains('Channel|Cycling|Connive|Learn|Ravenous', na=False)
    has_loot = df['themeTags'].apply(lambda x: 'Loot' in x)
    # Define name exclusions
    EXCLUDED_NAMES = {
        'Archivist of Oghma', 'Argothian Enchantress', 'Audacity', 'Betrayal', 'Bequeathal', 'Blood Scrivener', 'Brigon, Soldier of Meletis',
        'Compost', 'Concealing curtains // Revealing Eye', 'Cryptbreaker', 'Curiosity', 'Cuse of Vengeance', 'Cryptek', 'Dakra Mystic',
        'Dawn of a New Age', 'Dockside Chef', 'Dreamcatcher', 'Edgewall Innkeeper', 'Eidolon of Philosophy', 'Evolved Sleeper',
        'Femeref Enchantress', 'Finneas, Ace Archer', 'Flumph', 'Folk Hero', 'Frodo, Adventurous Hobbit', 'Goblin Artisans',
        'Goldberry, River-Daughter', 'Gollum, Scheming Guide', 'Hatching Plans', 'Ideas Unbound', 'Ingenius Prodigy', 'Ior Ruin Expedition',
        "Jace's Erasure", 'Keeper of the Mind', 'Kor Spiritdancer', 'Lodestone Bauble', 'Puresteel Paladin', 'Jeweled Bird', 'Mindblade Render',
        "Multani's Presence", "Nahiri's Lithoforming", 'Ordeal of Thassa', 'Pollywog Prodigy', 'Priest of Forgotten Gods', 'Ravenous Squirrel',
        'Read the Runes', 'Red Death, Shipwrecker', 'Roil Cartographer', 'Sage of Lat-Name', 'Saprazzan Heir', 'Scion of Halaster', 'See Beyond',
        'Selhoff Entomber', 'Shielded Aether Theif', 'Shore Keeper', 'silverquill Silencer', 'Soldevi Sage', 'Soldevi Sentry', 'Spiritual Focus',
        'Sram, Senior Edificer', 'Staff of the Storyteller', 'Stirge', 'Sylvan Echoes', "Sythis Harvest's Hand", 'Sygg, River Cutthroat',
        'Tenuous Truce', 'Test of Talents', 'Thalakos seer', "Tribute to Horobi // Echo of Deaths Wail", 'Vampire Gourmand', 'Vampiric Rites',
        'Vampirism', 'Vessel of Paramnesia', "Witch's Caultron", 'Wall of Mulch', 'Waste Not', 'Well Rested'
        # Add other excluded names here
    }
    excluded_names = df['name'].isin(EXCLUDED_NAMES)
    # Define cantrip conditions
    has_draw = df['text'].str.contains('draw a card', case=False, na=False)
    low_cost = df['manaValue'] <= 2
    # Combine all conditions
    cantrip_mask = (
        ~excluded_types &
        ~excluded_keywords &
        ~has_loot &
        ~excluded_names &
        has_draw &
        low_cost
    )
    # Apply tags using vectorized operation
    apply_tag_vectorized(df, cantrip_mask, TAG_GROUPS['Cantrips'])
    logging.info('Finished tagging cantrips in %s_cards.csv', color)
--- a/utility.py
+++ b/utility.py
@ -1,4 +1,18 @@
-def pluralize(word):
+from typing import Union, List
 import pandas as pd
 import re
 import logging
 from typing import Dict, Optional, Set
 from time import perf_counter
 def pluralize(word: str) -> str:
    """Convert a word to its plural form using basic English pluralization rules.
    Args:
        word: The singular word to pluralize
    Returns:
        The pluralized word
    """
    if word.endswith('y'):
        return word[:-1] + 'ies'
    elif word.endswith(('s', 'sh', 'ch', 'x', 'z')):
@ -8,10 +22,261 @@ def pluralize(word):
    else:
        return word + 's'
-def sort_list(list_to_sort):
+def sort_list(items: Union[List, pd.Series]) -> Union[List, pd.Series]:
-    if isinstance(list_to_sort, list):
+    """Sort a list or pandas Series in ascending order.
        list_to_sort = sorted(list_to_sort)
        return list_to_sort
    else:
        return list_to_sort
    Args:
        items: List or Series to sort
    Returns:
        Sorted list or Series
    """
    if isinstance(items, (list, pd.Series)):
        return sorted(items) if isinstance(items, list) else items.sort_values()
    return items
 def create_regex_mask(df: pd.DataFrame, column: str, pattern: str) -> pd.Series:
    """Create a boolean mask for rows where a column matches a regex pattern.
    Args:
        df: DataFrame to search
        column: Column name to search in
        pattern: Regex pattern to match
    Returns:
        Boolean Series indicating matching rows
    """
    return df[column].str.contains(pattern, case=False, na=False, regex=True)
 def combine_masks(masks: List[pd.Series], logical_operator: str = 'and') -> pd.Series:
    """Combine multiple boolean masks with a logical operator.
    Args:
        masks: List of boolean Series masks to combine
        logical_operator: Logical operator to use ('and' or 'or')
    Returns:
        Combined boolean mask
    """
    if not masks:
        return pd.Series([], dtype=bool)
    result = masks[0]
    for mask in masks[1:]:
        if logical_operator == 'and':
            result = result & mask
        else:
            result = result | mask
    return result
 def safe_str_contains(series: pd.Series, patterns: Union[str, List[str]], regex: bool = False) -> pd.Series:
    """Safely check if strings in a Series contain one or more patterns, handling NA values.
    Args:
        series: String Series to check
        patterns: String or list of strings to look for
        regex: Whether to treat patterns as regex expressions
    Returns:
        Boolean Series indicating which strings contain any of the patterns
    """
    if isinstance(patterns, str):
        patterns = [patterns]
    if regex:
        pattern = '|'.join(f'({p})' for p in patterns)
        return series.fillna('').str.contains(pattern, case=False, na=False, regex=True)
    else:
        masks = [series.fillna('').str.contains(p, case=False, na=False, regex=False) for p in patterns]
        return pd.concat(masks, axis=1).any(axis=1)
 def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
    """Create a boolean mask for rows where type matches one or more patterns.
    Args:
        df: DataFrame to search
        type_text: Type text pattern(s) to match. Can be a single string or list of strings.
        regex: Whether to treat patterns as regex expressions (default: True)
    Returns:
        Boolean Series indicating matching rows
    Raises:
        ValueError: If type_text is empty or None
        TypeError: If type_text is not a string or list of strings
    """
    if not type_text:
        raise ValueError("type_text cannot be empty or None")
    if isinstance(type_text, str):
        type_text = [type_text]
    elif not isinstance(type_text, list):
        raise TypeError("type_text must be a string or list of strings")
    if regex:
        pattern = '|'.join(f'{p}' for p in type_text)
        return df['type'].str.contains(pattern, case=False, na=False, regex=True)
    else:
        masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
        return pd.concat(masks, axis=1).any(axis=1)
 def create_combined_type_mask(df: pd.DataFrame, type_patterns: Dict[str, List[str]], logical_operator: str = 'and') -> pd.Series:
    """Create a combined boolean mask from multiple type patterns.
    Args:
        df: DataFrame to search
        type_patterns: Dictionary mapping type categories to lists of patterns
        logical_operator: How to combine masks ('and' or 'or')
    Returns:
        Combined boolean mask
    Example:
        patterns = {
            'creature': ['Creature', 'Artifact Creature'],
            'enchantment': ['Enchantment', 'Enchantment Creature']
        }
        mask = create_combined_type_mask(df, patterns, 'or')
    """
    if not type_patterns:
        return pd.Series(True, index=df.index)
    category_masks = []
    for patterns in type_patterns.values():
        category_masks.append(create_type_mask(df, patterns))
    return combine_masks(category_masks, logical_operator)
 def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
    """Extract creature types from a type text string.
    Args:
        type_text: The type line text to parse
        creature_types: List of valid creature types
        non_creature_types: List of non-creature types to exclude
    Returns:
        List of extracted creature types
    """
    types = [t.strip() for t in type_text.split()]
    return [t for t in types if t in creature_types and t not in non_creature_types]
 def find_types_in_text(text: str, name: str, creature_types: List[str]) -> List[str]:
    """Find creature types mentioned in card text.
    Args:
        text: Card text to search
        name: Card name to exclude from search
        creature_types: List of valid creature types
    Returns:
        List of found creature types
    """
    if pd.isna(text):
        return []
    found_types = []
    words = text.split()
    for word in words:
        clean_word = re.sub(r'[^a-zA-Z-]', '', word)
        if clean_word in creature_types:
            if clean_word not in name:
                found_types.append(clean_word)
    return list(set(found_types))
 def add_outlaw_type(types: List[str], outlaw_types: List[str]) -> List[str]:
    """Add Outlaw type if card has an outlaw-related type.
    Args:
        types: List of current types
        outlaw_types: List of types that qualify for Outlaw
    Returns:
        Updated list of types
    """
    if any(t in outlaw_types for t in types) and 'Outlaw' not in types:
        return types + ['Outlaw']
    return types
 def batch_update_types(df: pd.DataFrame, mask: pd.Series, new_types: List[str]) -> None:
    """Update creature types for multiple rows efficiently.
    Args:
        df: DataFrame to update
        mask: Boolean mask indicating which rows to update
        new_types: List of types to add
    """
    df.loc[mask, 'creatureTypes'] = df.loc[mask, 'creatureTypes'].apply(
        lambda x: sorted(list(set(x + new_types)))
    )
 def create_tag_mask(df: pd.DataFrame, tag_patterns: Union[str, List[str]], column: str = 'themeTags') -> pd.Series:
    """Create a boolean mask for rows where tags match specified patterns.
    Args:
        df: DataFrame to search
        tag_patterns: String or list of strings to match against tags
        column: Column containing tags to search (default: 'themeTags')
    Returns:
        Boolean Series indicating matching rows
    """
    if isinstance(tag_patterns, str):
        tag_patterns = [tag_patterns]
    # Handle empty DataFrame case
    if len(df) == 0:
        return pd.Series([], dtype=bool)
    # Create mask for each pattern
    masks = [df[column].apply(lambda x: any(pattern in tag for tag in x)) for pattern in tag_patterns]
    # Combine masks with OR
    return pd.concat(masks, axis=1).any(axis=1)
 def validate_dataframe_columns(df: pd.DataFrame, required_columns: Set[str]) -> None:
    """Validate that DataFrame contains all required columns.
    Args:
        df: DataFrame to validate
        required_columns: Set of column names that must be present
    Raises:
        ValueError: If any required columns are missing
    """
    missing = required_columns - set(df.columns)
    if missing:
        raise ValueError(f"Missing required columns: {missing}")
 def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series, tags: List[str]) -> None:
    """Apply tags to rows in a dataframe based on a boolean mask.
    Args:
        df: The dataframe to modify
        mask: Boolean series indicating which rows to tag
        tags: List of tags to apply
    """
    if not isinstance(tags, list):
        tags = [tags]
    # Get current tags for masked rows
    current_tags = df.loc[mask, 'themeTags']
    # Add new tags
    df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
 def log_performance_metrics(start_time: float, operation: str, df_size: int) -> None:
    """Log performance metrics for an operation.
    Args:
        start_time: Start time from perf_counter()
        operation: Description of the operation performed
        df_size: Size of the DataFrame processed
    """
    duration = perf_counter() - start_time
    logging.info(
        f"{operation} completed in {duration:.2f}s for {df_size} rows "
        f"({duration/df_size*1000:.2f}ms per row)"
    )