Moved the builder, tagger, and setup modules into their own folders, along with constants to help provide better clarity and readability. Additionally added a missing call for the tag_for_artifcact_triggers() function

2025-09-22 04:50:46 +02:00 · 2025-01-28 10:19:44 -08:00 · 2025-01-28 10:19:44 -08:00 · dbbc8bc66e
commit dbbc8bc66e
parent 3a5beebfe2
20 changed files with 1525 additions and 1737 deletions
--- a/code/file_setup/init.py
+++ b/code/file_setup/init.py
@ -0,0 +1,8 @@
+"""Initialize the file_setup package."""
+
+from .setup import setup, regenerate_csv_by_color
+
+__all__ = [
+    'setup',
+    'regenerate_csv_by_color'
+]
--- a/code/file_setup/setup.py
+++ b/code/file_setup/setup.py
@ -0,0 +1,337 @@
+"""MTG Python Deckbuilder setup module.
+
+This module provides the main setup functionality for the MTG Python Deckbuilder
+application. It handles initial setup tasks such as downloading card data,
+creating color-filtered card lists, and generating commander-eligible card lists.
+
+Key Features:
+    - Initial setup and configuration
+    - Card data download and processing
+    - Color-based card filtering
+    - Commander card list generation
+    - CSV file management and validation
+
+The module works in conjunction with setup_utils.py for utility functions and
+exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+import logging
+from enum import Enum
+import os
+from pathlib import Path
+from typing import Union, List, Dict, Any
+
+# Third-party imports
+import inquirer
+import pandas as pd
+
+# Local imports
+import logging_util
+from settings import CSV_DIRECTORY
+from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from .setup_utils import (
+    download_cards_csv,
+    filter_by_color_identity,
+    filter_dataframe,
+    process_legendary_cards
+)
+from exceptions import (
+    CSVFileNotFoundError,
+    ColorFilterError,
+    CommanderValidationError,
+    DataFrameProcessingError,
+    MTGJSONDownloadError
+)
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Create CSV directory if it doesn't exist
+if not os.path.exists(CSV_DIRECTORY):
+    os.makedirs(CSV_DIRECTORY)
+
+def check_csv_exists(file_path: Union[str, Path]) -> bool:
+    """Check if a CSV file exists at the specified path.
+    
+    Args:
+        file_path: Path to the CSV file to check
+        
+    Returns:
+        bool: True if file exists, False otherwise
+        
+    Raises:
+        CSVFileNotFoundError: If there are issues accessing the file path
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8'):
+            return True
+    except FileNotFoundError:
+        return False
+    except Exception as e:
+        raise CSVFileNotFoundError(f'Error checking CSV file: {str(e)}')
+
+def initial_setup() -> None:
+    """Perform initial setup by downloading card data and creating filtered CSV files.
+    
+    Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
+    and generates commander-eligible cards list. Uses utility functions from setup_utils.py
+    for file operations and data processing.
+    
+    Raises:
+        CSVFileNotFoundError: If required CSV files cannot be found
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    logger.info('Checking for cards.csv file')
+    
+    try:
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        try:
+            with open(cards_file, 'r', encoding='utf-8'):
+                logger.info('cards.csv exists')
+        except FileNotFoundError:
+            logger.info('cards.csv not found, downloading from mtgjson')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+
+        df = pd.read_csv(cards_file, low_memory=False)
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+
+        logger.info('Checking for color identity sorted files')
+        
+        for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
+            logger.info(f'Checking for {SETUP_COLORS[i]}_cards.csv')
+            try:
+                with open(f'{CSV_DIRECTORY}/{SETUP_COLORS[i]}_cards.csv', 'r', encoding='utf-8'):
+                    logger.info(f'{SETUP_COLORS[i]}_cards.csv exists')
+            except FileNotFoundError:
+                logger.info(f'{SETUP_COLORS[i]}_cards.csv not found, creating one')
+                filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'{CSV_DIRECTORY}/{SETUP_COLORS[i]}_cards.csv')
+
+        # Generate commander list
+        determine_commanders()
+
+    except Exception as e:
+        logger.error(f'Error during initial setup: {str(e)}')
+        raise
+
+def filter_by_color(df: pd.DataFrame, column_name: str, value: str, new_csv_name: Union[str, Path]) -> None:
+    """Filter DataFrame by color identity and save to CSV.
+    
+    Args:
+        df: DataFrame to filter
+        column_name: Column to filter on (should be 'colorIdentity')
+        value: Color identity value to filter for
+        new_csv_name: Path to save filtered CSV
+        
+    Raises:
+        ColorFilterError: If filtering fails
+        DataFrameProcessingError: If DataFrame processing fails
+        CSVFileNotFoundError: If CSV file operations fail
+    """
+    try:
+        # Check if target CSV already exists
+        if check_csv_exists(new_csv_name):
+            logger.info(f'{new_csv_name} already exists, will be overwritten')
+            
+        filtered_df = filter_by_color_identity(df, value)
+        filtered_df.to_csv(new_csv_name, index=False)
+        logger.info(f'Successfully created {new_csv_name}')
+    except (ColorFilterError, DataFrameProcessingError, CSVFileNotFoundError) as e:
+        logger.error(f'Failed to filter by color {value}: {str(e)}')
+        raise
+
+def determine_commanders() -> None:
+    """Generate commander_cards.csv containing all cards eligible to be commanders.
+    
+    This function processes the card database to identify and validate commander-eligible cards,
+    applying comprehensive validation steps and filtering criteria.
+    
+    Raises:
+        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
+        MTGJSONDownloadError: If downloading cards data fails
+        CommanderValidationError: If commander validation fails
+        DataFrameProcessingError: If data processing operations fail
+    """
+    logger.info('Starting commander card generation process')
+    
+    try:
+        # Check for cards.csv with progress tracking
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        if not check_csv_exists(cards_file):
+            logger.info('cards.csv not found, initiating download')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        else:
+            logger.info('cards.csv found, proceeding with processing')
+            
+        # Load and process cards data
+        logger.info('Loading card data from CSV')
+        df = pd.read_csv(cards_file, low_memory=False)
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+        
+        # Process legendary cards with validation
+        logger.info('Processing and validating legendary cards')
+        try:
+            filtered_df = process_legendary_cards(df)
+        except CommanderValidationError as e:
+            logger.error(f'Commander validation failed: {str(e)}')
+            raise
+        
+        # Apply standard filters
+        logger.info('Applying standard card filters')
+        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
+        
+        # Save commander cards
+        logger.info('Saving validated commander cards')
+        filtered_df.to_csv(f'{CSV_DIRECTORY}/commander_cards.csv', index=False)
+        
+        logger.info('Commander card generation completed successfully')
+        
+    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
+        logger.error(f'File operation error: {str(e)}')
+        raise
+    except CommanderValidationError as e:
+        logger.error(f'Commander validation error: {str(e)}')
+        raise
+    except Exception as e:
+        logger.error(f'Unexpected error during commander generation: {str(e)}')
+        raise
+    
+def regenerate_csvs_all() -> None:
+    """Regenerate all color-filtered CSV files from latest card data.
+    
+    Downloads fresh card data and recreates all color-filtered CSV files.
+    Useful for updating the card database when new sets are released.
+    
+    Raises:
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        logger.info('Downloading latest card data from MTGJSON')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+        
+        logger.info('Regenerating color identity sorted files')
+        for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
+            color = SETUP_COLORS[i]
+            color_id = COLOR_ABRV[i]
+            logger.info(f'Processing {color} cards')
+            filter_by_color(df, 'colorIdentity', color_id, f'{CSV_DIRECTORY}/{color}_cards.csv')
+            
+        logger.info('Regenerating commander cards')
+        determine_commanders()
+        
+        logger.info('Card database regeneration complete')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate card database: {str(e)}')
+        raise
+    # Once files are regenerated, create a new legendary list
+    determine_commanders()
+
+def regenerate_csv_by_color(color: str) -> None:
+    """Regenerate CSV file for a specific color identity.
+    
+    Args:
+        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        
+    Raises:
+        ValueError: If color is not valid
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        if color not in SETUP_COLORS:
+            raise ValueError(f'Invalid color: {color}')
+            
+        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
+        
+        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+        
+        logger.info(f'Regenerating {color} cards CSV')
+        filter_by_color(df, 'colorIdentity', color_abv, f'{CSV_DIRECTORY}/{color}_cards.csv')
+        
+        logger.info(f'Successfully regenerated {color} cards database')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        raise
+
+class SetupOption(Enum):
+    """Enum for setup menu options."""
+    INITIAL_SETUP = 'Initial Setup'
+    REGENERATE_CSV = 'Regenerate CSV Files'
+    BACK = 'Back'
+
+def _display_setup_menu() -> SetupOption:
+    """Display the setup menu and return the selected option.
+    
+    Returns:
+        SetupOption: The selected menu option
+    """
+    question: List[Dict[str, Any]] = [
+        inquirer.List(
+            'menu',
+            choices=[option.value for option in SetupOption],
+            carousel=True)]
+    answer = inquirer.prompt(question)
+    return SetupOption(answer['menu'])
+
+def setup() -> bool:
+    """Run the setup process for the MTG Python Deckbuilder.
+    
+    This function provides a menu-driven interface to:
+    1. Perform initial setup by downloading and processing card data
+    2. Regenerate CSV files with updated card data
+    3. Perform all tagging processes on the color-sorted csv files
+    
+    The function handles errors gracefully and provides feedback through logging.
+    
+    Returns:
+        bool: True if setup completed successfully, False otherwise
+    """
+    try:
+        print('Which setup operation would you like to perform?\n'
+              'If this is your first time setting up, do the initial setup.\n'
+              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+        
+        choice = _display_setup_menu()
+        
+        if choice == SetupOption.INITIAL_SETUP:
+            logger.info('Starting initial setup')
+            initial_setup()
+            logger.info('Initial setup completed successfully')
+            return True
+            
+        elif choice == SetupOption.REGENERATE_CSV:
+            logger.info('Starting CSV regeneration')
+            regenerate_csvs_all()
+            logger.info('CSV regeneration completed successfully')
+            return True
+            
+        elif choice == SetupOption.BACK:
+            logger.info('Setup cancelled by user')
+            return False
+            
+    except Exception as e:
+        logger.error(f'Error during setup: {e}')
+        raise
+    
+    return False
--- a/code/file_setup/setup_constants.py
+++ b/code/file_setup/setup_constants.py
@ -0,0 +1,118 @@
+from typing import Dict, List, Optional, Final, Tuple, Pattern, Union, Callable
+
+BANNED_CARDS: List[str] = [# in commander
+                'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
+                'Braids, Cabal Minion', 'Chaos Orb', 'Coalition Victory',
+                'Channel', 'Dockside Extortionist', 'Emrakul, the Aeons Torn',
+                'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
+                'Flash', 'Gifts Ungiven', 'Golos, Tireless Pilgrim',
+                'Griselbrand', 'Hullbreacher', 'Iona, Shield of Emeria',
+                'Karakas', 'Jeweled Lotus', 'Leovold, Emissary of Trest',
+                'Library of Alexandria', 'Limited Resources', 'Lutri, the Spellchaser',
+                'Mana Crypt', 'Mox Emerald', 'Mox Jet', 'Mox Pearl', 'Mox Ruby',
+                'Mox Sapphire', 'Nadu, Winged Wisdom', 'Panoptic Mirror',
+                'Paradox Engine', 'Primeval Titan', 'Prophet of Kruphix',
+                'Recurring Nightmare', 'Rofellos, Llanowar Emissary', 'Shahrazad',
+                'Sundering Titan', 'Sway of the Stars', 'Sylvan Primordial',
+                'Time Vault', 'Time Walk', 'Tinker', 'Tolarian Academy',
+                'Trade Secrets', 'Upheaval', 'Yawgmoth\'s Bargain',
+                
+                # In constructed
+                'Invoke Prejudice', 'Cleanse', 'Stone-Throwing Devils', 'Pradesh Gypsies',
+                'Jihad', 'Imprison', 'Crusade'
+                ]
+
+SETUP_COLORS: List[str] = ['colorless', 'white', 'blue', 'black', 'green', 'red',
+          'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
+          'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
+          'bant', 'esper', 'grixis', 'jund', 'naya',
+          'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
+          'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
+
+COLOR_ABRV: List[str] = ['Colorless', 'W', 'U', 'B', 'G', 'R',
+              'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
+              'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
+              'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
+              'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
+              'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
+              'B, R, U, W', 'B, G, R, U, W']
+
+# Constants for setup and CSV processing
+MTGJSON_API_URL: str = 'https://mtgjson.com/api/v5/csv/cards.csv'
+
+LEGENDARY_OPTIONS: List[str] = [
+    'Legendary Creature',
+    'Legendary Artifact',
+    'Legendary Artifact Creature', 
+    'Legendary Enchantment Creature',
+    'Legendary Planeswalker'
+]
+
+NON_LEGAL_SETS: List[str] = [
+    'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
+    'UGL', 'UND', 'UNH', 'UST'
+]
+
+CARD_TYPES_TO_EXCLUDE: List[str] = [
+    'Plane —',
+    'Conspiracy',
+    'Vanguard', 
+    'Scheme',
+    'Phenomenon',
+    'Stickers',
+    'Attraction',
+    'Hero',
+    'Contraption'
+]
+
+# Columns to keep when processing CSV files
+CSV_PROCESSING_COLUMNS: List[str] = [
+    'name',        # Card name
+    'faceName',    # Name of specific face for multi-faced cards
+    'edhrecRank',  # Card's rank on EDHREC
+    'colorIdentity',  # Color identity for Commander format
+    'colors',      # Actual colors in card's mana cost
+    'manaCost',    # Mana cost string
+    'manaValue',   # Converted mana cost
+    'type',        # Card type line
+    'layout',      # Card layout (normal, split, etc)
+    'text',        # Card text/rules
+    'power',       # Power (for creatures)
+    'toughness',   # Toughness (for creatures)
+    'keywords',    # Card's keywords
+    'side'         # Side identifier for multi-faced cards
+]
+
+# Configuration for DataFrame sorting operations
+SORT_CONFIG = {
+    'columns': ['name', 'side'],  # Columns to sort by
+    'case_sensitive': False  # Ignore case when sorting
+}
+
+# Configuration for DataFrame filtering operations
+FILTER_CONFIG: Dict[str, Dict[str, List[str]]] = {
+    'layout': {
+        'exclude': ['reversible_card']
+    },
+    'availability': {
+        'require': ['paper']
+    },
+    'promoTypes': {
+        'exclude': ['playtest']
+    },
+    'securityStamp': {
+        'exclude': ['Heart', 'Acorn']
+    }
+}
+
+COLUMN_ORDER: List[str] = [
+    'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
+    'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
+    'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
+]
+
+TAGGED_COLUMN_ORDER: List[str] = [
+    'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
+    'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
+    'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
+]
--- a/code/file_setup/setup_utils.py
+++ b/code/file_setup/setup_utils.py
@ -0,0 +1,408 @@
+"""MTG Python Deckbuilder setup utilities.
+
+This module provides utility functions for setting up and managing the MTG Python Deckbuilder
+application. It handles tasks such as downloading card data, filtering cards by various criteria,
+and processing legendary creatures for commander format.
+
+Key Features:
+    - Card data download from MTGJSON
+    - DataFrame filtering and processing
+    - Color identity filtering
+    - Commander validation
+    - CSV file management
+
+The module integrates with settings.py for configuration and exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+import logging
+import os
+import requests
+from pathlib import Path
+from typing import List, Optional, Union, TypedDict
+
+# Third-party imports
+import pandas as pd
+from tqdm import tqdm
+
+# Local application imports
+from .setup_constants import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    LEGENDARY_OPTIONS,
+    SORT_CONFIG,
+    FILTER_CONFIG,
+    COLUMN_ORDER,
+    TAGGED_COLUMN_ORDER
+)
+from exceptions import (
+    MTGJSONDownloadError,
+    DataFrameProcessingError,
+    ColorFilterError,
+    CommanderValidationError
+)
+from type_definitions import CardLibraryDF
+from settings import FILL_NA_COLUMNS
+import logging_util
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Type definitions
+class FilterRule(TypedDict):
+    """Type definition for filter rules configuration."""
+    exclude: Optional[List[str]]
+    require: Optional[List[str]]
+
+class FilterConfig(TypedDict):
+    """Type definition for complete filter configuration."""
+    layout: FilterRule
+    availability: FilterRule
+    promoTypes: FilterRule
+    securityStamp: FilterRule
+def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
+    """Download cards data from MTGJSON and save to CSV.
+
+    Downloads card data from the specified MTGJSON URL and saves it to a local CSV file.
+    Shows a progress bar during download using tqdm.
+
+    Args:
+        url: URL to download cards data from (typically MTGJSON API endpoint)
+        output_path: Path where the downloaded CSV file will be saved
+
+    Raises:
+        MTGJSONDownloadError: If download fails due to network issues or invalid response
+
+    Example:
+        >>> download_cards_csv('https://mtgjson.com/api/v5/cards.csv', 'cards.csv')
+    """
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        
+        with open(output_path, 'wb') as f:
+            with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    size = f.write(chunk)
+                    pbar.update(size)
+            
+    except requests.RequestException as e:
+        logger.error(f'Failed to download cards data from {url}')
+        raise MTGJSONDownloadError(
+            "Failed to download cards data",
+            url,
+            getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
+        ) from e
+def check_csv_exists(filepath: Union[str, Path]) -> bool:
+    """Check if a CSV file exists at the specified path.
+
+    Verifies the existence of a CSV file at the given path. This function is used
+    to determine if card data needs to be downloaded or if it already exists locally.
+
+    Args:
+        filepath: Path to the CSV file to check
+
+    Returns:
+        bool: True if the file exists, False otherwise
+
+    Example:
+        >>> if not check_csv_exists('cards.csv'):
+        ...     download_cards_csv(MTGJSON_API_URL, 'cards.csv')
+    """
+    return Path(filepath).is_file()
+
+def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
+    """Apply standard filters to the cards DataFrame using configuration from settings.
+
+    Applies a series of filters to the cards DataFrame based on configuration from settings.py.
+    This includes handling null values, applying basic filters, removing illegal sets and banned cards,
+    and processing special card types.
+
+    Args:
+        df: pandas DataFrame containing card data to filter
+        banned_cards: List of card names that are banned and should be excluded
+
+    Returns:
+        pd.DataFrame: A new DataFrame containing only the cards that pass all filters
+
+    Raises:
+        DataFrameProcessingError: If any filtering operation fails
+
+    Example:
+        >>> filtered_df = filter_dataframe(cards_df, ['Channel', 'Black Lotus'])
+    """
+    try:
+        logger.info('Starting standard DataFrame filtering')
+        
+        # Fill null values according to configuration
+        for col, fill_value in FILL_NA_COLUMNS.items():
+            if col == 'faceName':
+                fill_value = df['name']
+            df[col] = df[col].fillna(fill_value)
+            logger.debug(f'Filled NA values in {col} with {fill_value}')
+        
+        # Apply basic filters from configuration
+        filtered_df = df.copy()
+        filter_config: FilterConfig = FILTER_CONFIG  # Type hint for configuration
+        for field, rules in filter_config.items():
+            for rule_type, values in rules.items():
+                if rule_type == 'exclude':
+                    for value in values:
+                        filtered_df = filtered_df[~filtered_df[field].str.contains(value, na=False)]
+                elif rule_type == 'require':
+                    for value in values:
+                        filtered_df = filtered_df[filtered_df[field].str.contains(value, na=False)]
+                logger.debug(f'Applied {rule_type} filter for {field}: {values}')
+        
+        # Remove illegal sets
+        for set_code in NON_LEGAL_SETS:
+            filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
+        logger.debug('Removed illegal sets')
+
+        # Remove banned cards
+        for card in banned_cards:
+            filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
+        logger.debug('Removed banned cards')
+
+        # Remove special card types
+        for card_type in CARD_TYPES_TO_EXCLUDE:
+            filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
+        logger.debug('Removed special card types')
+
+        # Select columns, sort, and drop duplicates
+        filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
+        filtered_df = filtered_df.sort_values(
+            by=SORT_CONFIG['columns'],
+            key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
+        )
+        filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
+        logger.info('Completed standard DataFrame filtering')
+        
+        return filtered_df
+
+    except Exception as e:
+        logger.error(f'Failed to filter DataFrame: {str(e)}')
+        raise DataFrameProcessingError(
+            "Failed to filter DataFrame",
+            "standard_filtering",
+            str(e)
+        ) from e
+def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
+    """Filter DataFrame by color identity with additional color-specific processing.
+
+    This function extends the base filter_dataframe functionality with color-specific
+    filtering logic. It is used by setup.py's filter_by_color function but provides
+    a more robust and configurable implementation.
+
+    Args:
+        df: DataFrame to filter
+        color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
+
+    Returns:
+        DataFrame filtered by color identity
+
+    Raises:
+        ColorFilterError: If color identity is invalid or filtering fails
+        DataFrameProcessingError: If general filtering operations fail
+    """
+    try:
+        logger.info(f'Filtering cards for color identity: {color_identity}')
+
+        # Validate color identity
+        with tqdm(total=1, desc='Validating color identity') as pbar:
+            if not isinstance(color_identity, str):
+                raise ColorFilterError(
+                    "Invalid color identity type",
+                    str(color_identity),
+                    "Color identity must be a string"
+                )
+            pbar.update(1)
+            
+        # Apply base filtering
+        with tqdm(total=1, desc='Applying base filtering') as pbar:
+            filtered_df = filter_dataframe(df, [])
+            pbar.update(1)
+            
+        # Filter by color identity
+        with tqdm(total=1, desc='Filtering by color identity') as pbar:
+            filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
+            logger.debug(f'Applied color identity filter: {color_identity}')
+            pbar.update(1)
+            
+        # Additional color-specific processing
+        with tqdm(total=1, desc='Performing color-specific processing') as pbar:
+            # Placeholder for future color-specific processing
+            pbar.update(1)
+        logger.info(f'Completed color identity filtering for {color_identity}')
+        return filtered_df
+        
+    except DataFrameProcessingError as e:
+        raise ColorFilterError(
+            "Color filtering failed",
+            color_identity,
+            str(e)
+        ) from e
+    except Exception as e:
+        raise ColorFilterError(
+            "Unexpected error during color filtering",
+            color_identity,
+            str(e)
+        ) from e
+        
+def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
+    """Process and filter legendary cards for commander eligibility with comprehensive validation.
+
+    Args:
+        df: DataFrame containing all cards
+
+    Returns:
+        DataFrame containing only commander-eligible cards
+
+    Raises:
+        CommanderValidationError: If validation fails for legendary status, special cases, or set legality
+        DataFrameProcessingError: If general processing fails
+    """
+    try:
+        logger.info('Starting commander validation process')
+
+        filtered_df = df.copy()
+        # Step 1: Check legendary status
+        try:
+            with tqdm(total=1, desc='Checking legendary status') as pbar:
+                mask = filtered_df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
+                if not mask.any():
+                    raise CommanderValidationError(
+                        "No legendary creatures found",
+                        "legendary_check",
+                        "DataFrame contains no cards matching legendary criteria"
+                    )
+                filtered_df = filtered_df[mask].copy()
+                logger.debug(f'Found {len(filtered_df)} legendary cards')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Legendary status check failed",
+                "legendary_check",
+                str(e)
+            ) from e
+
+        # Step 2: Validate special cases
+        try:
+            with tqdm(total=1, desc='Validating special cases') as pbar:
+                special_cases = df['text'].str.contains('can be your commander', na=False)
+                special_commanders = df[special_cases].copy()
+                filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
+                logger.debug(f'Added {len(special_commanders)} special commander cards')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Special case validation failed",
+                "special_cases",
+                str(e)
+            ) from e
+
+        # Step 3: Verify set legality
+        try:
+            with tqdm(total=1, desc='Verifying set legality') as pbar:
+                initial_count = len(filtered_df)
+                for set_code in NON_LEGAL_SETS:
+                    filtered_df = filtered_df[
+                        ~filtered_df['printings'].str.contains(set_code, na=False)
+                    ]
+                removed_count = initial_count - len(filtered_df)
+                logger.debug(f'Removed {removed_count} cards from illegal sets')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Set legality verification failed",
+                "set_legality",
+                str(e)
+            ) from e
+        logger.info(f'Commander validation complete. {len(filtered_df)} valid commanders found')
+        return filtered_df
+
+    except CommanderValidationError:
+        raise
+    except Exception as e:
+        raise DataFrameProcessingError(
+            "Failed to process legendary cards",
+            "commander_processing",
+            str(e)
+        ) from e
+
+def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
+                         include_commander_cols: bool = False, skip_availability_checks: bool = False) -> CardLibraryDF:
+    """Process DataFrame with common operations in batches.
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+        skip_availability_checks: Whether to skip availability and security checks (default: False)
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+
+    Returns:
+        CardLibraryDF: Processed DataFrame with standardized structure
+    """
+    logger.info("Processing card DataFrame...")
+
+    if columns_to_keep is None:
+        columns_to_keep = TAGGED_COLUMN_ORDER.copy()
+        if include_commander_cols:
+            commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
+            columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
+
+    # Fill NA values
+    df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+    df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
+
+    # Process in batches
+    total_batches = len(df) // batch_size + 1
+    processed_dfs = []
+
+    for i in tqdm(range(total_batches), desc="Processing batches"):
+        start_idx = i * batch_size
+        end_idx = min((i + 1) * batch_size, len(df))
+        batch = df.iloc[start_idx:end_idx].copy()
+
+        if not skip_availability_checks:
+            columns_to_keep = COLUMN_ORDER.copy()
+            logger.debug("Performing column checks...")
+            # Common processing steps
+            batch = batch[batch['availability'].str.contains('paper', na=False)]
+            batch = batch.loc[batch['layout'] != 'reversible_card']
+            batch = batch.loc[batch['promoTypes'] != 'playtest']
+            batch = batch.loc[batch['securityStamp'] != 'heart']
+            batch = batch.loc[batch['securityStamp'] != 'acorn']
+            # Keep only specified columns
+            batch = batch[columns_to_keep]
+            processed_dfs.append(batch)
+        else:
+            logger.debug("Skipping column checks...")
+    
+    # Keep only specified columns
+    batch = batch[columns_to_keep]
+    processed_dfs.append(batch)
+
+    # Combine processed batches
+    result = pd.concat(processed_dfs, ignore_index=True)
+
+    # Final processing
+    result.drop_duplicates(subset='faceName', keep='first', inplace=True)
+    result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
+
+    logger.info("DataFrame processing completed")
+    return result