feat: migrate to unified Parquet format with instant GitHub setup and 4x faster tagging

2026-03-17 18:56:30 +01:00 · 2025-10-18 21:32:12 -07:00 · 2025-10-18 21:32:12 -07:00 · 8435312c8f
commit 8435312c8f
parent e9e949aae3
58 changed files with 11921 additions and 3961 deletions
--- a/code/file_setup/init.py
+++ b/code/file_setup/init.py
@ -1,8 +1,8 @@
 """Initialize the file_setup package."""

-from .setup import setup, regenerate_csv_by_color
+from .setup import initial_setup, regenerate_processed_parquet

 __all__ = [
-    'setup',
-    'regenerate_csv_by_color'
+    'initial_setup',
+    'regenerate_processed_parquet'
 ]
--- a/code/file_setup/data_loader.py
+++ b/code/file_setup/data_loader.py
@ -0,0 +1,338 @@
+"""Data loader abstraction for CSV and Parquet formats.
+
+This module provides a unified interface for reading and writing card data
+in both CSV and Parquet formats. It handles format detection, conversion,
+and schema validation.
+
+Introduced in v3.0.0 as part of the Parquet migration.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import List, Optional
+
+import pandas as pd
+
+from logging_util import get_logger
+from path_util import card_files_processed_dir
+
+logger = get_logger(__name__)
+
+
+# Required columns for deck building
+REQUIRED_COLUMNS = [
+    "name",
+    "colorIdentity",
+    "type",  # MTGJSON uses 'type' not 'types'
+    "keywords",
+    "manaValue",
+    "text",
+    "power",
+    "toughness",
+]
+
+
+def validate_schema(df: pd.DataFrame, required: Optional[List[str]] = None) -> None:
+    """Validate that DataFrame contains required columns.
+    
+    Args:
+        df: DataFrame to validate
+        required: List of required columns (uses REQUIRED_COLUMNS if None)
+    
+    Raises:
+        ValueError: If required columns are missing
+    """
+    required = required or REQUIRED_COLUMNS
+    missing = [col for col in required if col not in df.columns]
+    
+    if missing:
+        raise ValueError(
+            f"Schema validation failed: missing required columns {missing}. "
+            f"Available columns: {list(df.columns)}"
+        )
+    
+    logger.debug(f"✓ Schema validation passed ({len(required)} required columns present)")
+
+
+class DataLoader:
+    """Unified data loading interface supporting CSV and Parquet formats.
+    
+    This class provides transparent access to card data regardless of the
+    underlying storage format. It automatically detects the format based on
+    file extensions and provides conversion utilities.
+    
+    Examples:
+        >>> loader = DataLoader()
+        >>> df = loader.read_cards("card_files/processed/all_cards.parquet")
+        >>> loader.write_cards(df, "output.parquet")
+        >>> loader.convert("input.csv", "output.parquet")
+    """
+    
+    def __init__(self, format: str = "auto"):
+        """Initialize the data loader.
+        
+        Args:
+            format: Format preference - "csv", "parquet", or "auto" (default: auto)
+                   "auto" detects format from file extension
+        """
+        self.format = format.lower()
+        if self.format not in ("csv", "parquet", "auto"):
+            raise ValueError(f"Unsupported format: {format}. Use 'csv', 'parquet', or 'auto'.")
+    
+    def read_cards(
+        self,
+        path: str,
+        columns: Optional[List[str]] = None,
+        format: Optional[str] = None
+    ) -> pd.DataFrame:
+        """Load card data from a file.
+        
+        Args:
+            path: File path (e.g., "card_files/processed/all_cards.parquet")
+            columns: Optional list of columns to load (Parquet optimization)
+            format: Override format detection (uses self.format if None)
+        
+        Returns:
+            DataFrame with card data
+        
+        Raises:
+            FileNotFoundError: If the file doesn't exist
+            ValueError: If format is unsupported
+        """
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"Card data file not found: {path}")
+        
+        detected_format = format or self._detect_format(path)
+        
+        logger.debug(f"Loading card data from {path} (format: {detected_format})")
+        
+        if detected_format == "csv":
+            return self._read_csv(path, columns)
+        elif detected_format == "parquet":
+            return self._read_parquet(path, columns)
+        else:
+            raise ValueError(f"Unsupported format: {detected_format}")
+    
+    def write_cards(
+        self,
+        df: pd.DataFrame,
+        path: str,
+        format: Optional[str] = None,
+        index: bool = False
+    ) -> None:
+        """Save card data to a file.
+        
+        Args:
+            df: DataFrame to save
+            path: Output file path
+            format: Force format (overrides auto-detection)
+            index: Whether to write DataFrame index (default: False)
+        
+        Raises:
+            ValueError: If format is unsupported
+        """
+        detected_format = format or self._detect_format(path)
+        
+        # Ensure output directory exists
+        os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
+        
+        logger.debug(f"Writing card data to {path} (format: {detected_format}, rows: {len(df)})")
+        
+        if detected_format == "csv":
+            self._write_csv(df, path, index)
+        elif detected_format == "parquet":
+            self._write_parquet(df, path, index)
+        else:
+            raise ValueError(f"Unsupported format: {detected_format}")
+    
+    def convert(
+        self,
+        src_path: str,
+        dst_path: str,
+        columns: Optional[List[str]] = None
+    ) -> None:
+        """Convert between CSV and Parquet formats.
+        
+        Args:
+            src_path: Source file path
+            dst_path: Destination file path
+            columns: Optional list of columns to include (all if None)
+        
+        Examples:
+            >>> loader.convert("cards.csv", "cards.parquet")
+            >>> loader.convert("cards.parquet", "cards.csv", columns=["name", "type"])
+        """
+        logger.info(f"Converting {src_path} → {dst_path}")
+        df = self.read_cards(src_path, columns=columns)
+        self.write_cards(df, dst_path)
+        logger.info(f"✓ Converted {len(df)} cards")
+    
+    def _read_csv(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
+        """Read CSV file."""
+        try:
+            return pd.read_csv(path, usecols=columns, low_memory=False)
+        except Exception as e:
+            logger.error(f"Failed to read CSV from {path}: {e}")
+            raise
+    
+    def _read_parquet(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
+        """Read Parquet file."""
+        try:
+            return pd.read_parquet(path, columns=columns)
+        except Exception as e:
+            logger.error(f"Failed to read Parquet from {path}: {e}")
+            raise
+    
+    def _write_csv(self, df: pd.DataFrame, path: str, index: bool) -> None:
+        """Write CSV file."""
+        try:
+            df.to_csv(path, index=index)
+        except Exception as e:
+            logger.error(f"Failed to write CSV to {path}: {e}")
+            raise
+    
+    def _write_parquet(self, df: pd.DataFrame, path: str, index: bool) -> None:
+        """Write Parquet file with Snappy compression."""
+        try:
+            df.to_parquet(path, index=index, compression="snappy", engine="pyarrow")
+        except Exception as e:
+            logger.error(f"Failed to write Parquet to {path}: {e}")
+            raise
+    
+    def _detect_format(self, path: str) -> str:
+        """Detect file format from extension.
+        
+        Args:
+            path: File path to analyze
+        
+        Returns:
+            Format string: "csv" or "parquet"
+        
+        Raises:
+            ValueError: If format cannot be determined
+        """
+        if self.format != "auto":
+            return self.format
+        
+        # Check file extension
+        if path.endswith(".csv"):
+            return "csv"
+        elif path.endswith(".parquet"):
+            return "parquet"
+        
+        # Try to infer from existing files (no extension provided)
+        if os.path.exists(f"{path}.parquet"):
+            return "parquet"
+        elif os.path.exists(f"{path}.csv"):
+            return "csv"
+        
+        raise ValueError(
+            f"Cannot determine format for '{path}'. "
+            "Use .csv or .parquet extension, or specify format explicitly."
+        )
+    
+    def write_batch_parquet(
+        self,
+        df: pd.DataFrame,
+        batch_id: int,
+        tag: str = "",
+        batches_dir: Optional[str] = None
+    ) -> str:
+        """Write a batch Parquet file (used during tagging).
+        
+        Args:
+            df: DataFrame to save as a batch
+            batch_id: Unique batch identifier (e.g., 0, 1, 2...)
+            tag: Optional tag to include in filename (e.g., "white", "commander")
+            batches_dir: Directory for batch files (defaults to card_files/processed/batches)
+        
+        Returns:
+            Path to the written batch file
+        
+        Example:
+            >>> loader.write_batch_parquet(white_df, batch_id=0, tag="white")
+            'card_files/processed/batches/batch_0_white.parquet'
+        """
+        if batches_dir is None:
+            batches_dir = os.path.join(card_files_processed_dir(), "batches")
+        
+        os.makedirs(batches_dir, exist_ok=True)
+        
+        # Build filename: batch_{id}_{tag}.parquet or batch_{id}.parquet
+        filename = f"batch_{batch_id}_{tag}.parquet" if tag else f"batch_{batch_id}.parquet"
+        path = os.path.join(batches_dir, filename)
+        
+        logger.debug(f"Writing batch {batch_id} ({tag or 'no tag'}): {len(df)} cards → {path}")
+        self.write_cards(df, path, format="parquet")
+        
+        return path
+    
+    def merge_batches(
+        self,
+        output_path: Optional[str] = None,
+        batches_dir: Optional[str] = None,
+        cleanup: bool = True
+    ) -> pd.DataFrame:
+        """Merge all batch Parquet files into a single output file.
+        
+        Args:
+            output_path: Path for merged output (defaults to card_files/processed/all_cards.parquet)
+            batches_dir: Directory containing batch files (defaults to card_files/processed/batches)
+            cleanup: Whether to delete batch files after merging (default: True)
+        
+        Returns:
+            Merged DataFrame
+        
+        Raises:
+            FileNotFoundError: If no batch files found
+        
+        Example:
+            >>> loader.merge_batches()  # Merges all batches → all_cards.parquet
+        """
+        if batches_dir is None:
+            batches_dir = os.path.join(card_files_processed_dir(), "batches")
+        
+        if output_path is None:
+            from code.path_util import get_processed_cards_path
+            output_path = get_processed_cards_path()
+        
+        # Find all batch files
+        batch_files = sorted(Path(batches_dir).glob("batch_*.parquet"))
+        
+        if not batch_files:
+            raise FileNotFoundError(f"No batch files found in {batches_dir}")
+        
+        logger.info(f"Merging {len(batch_files)} batch files from {batches_dir}")
+        
+        # Read and concatenate all batches
+        dfs = []
+        for batch_file in batch_files:
+            logger.debug(f"Reading batch: {batch_file.name}")
+            df = self.read_cards(str(batch_file), format="parquet")
+            dfs.append(df)
+        
+        # Merge all batches
+        merged_df = pd.concat(dfs, ignore_index=True)
+        logger.info(f"Merged {len(merged_df)} total cards from {len(dfs)} batches")
+        
+        # Write merged output
+        self.write_cards(merged_df, output_path, format="parquet")
+        logger.info(f"✓ Wrote merged data to {output_path}")
+        
+        # Cleanup batch files if requested
+        if cleanup:
+            logger.debug(f"Cleaning up {len(batch_files)} batch files")
+            for batch_file in batch_files:
+                batch_file.unlink()
+            
+            # Remove batches directory if empty
+            try:
+                Path(batches_dir).rmdir()
+                logger.debug(f"Removed empty batches directory: {batches_dir}")
+            except OSError:
+                pass  # Directory not empty, keep it
+        
+        return merged_df
+
--- a/code/file_setup/old/setup.py
+++ b/code/file_setup/old/setup.py
@ -0,0 +1,362 @@
+"""MTG Python Deckbuilder setup module.
+
+This module provides the main setup functionality for the MTG Python Deckbuilder
+application. It handles initial setup tasks such as downloading card data,
+creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+
+        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
+
+Key Features:
+    - Initial setup and configuration
+    - Card data download and processing
+    - Color-based card filtering
+    - Commander card list generation
+    - CSV file management and validation
+
+The module works in conjunction with setup_utils.py for utility functions and
+exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+from enum import Enum
+import os
+from typing import List, Dict, Any
+
+# Third-party imports (optional)
+try:
+    import inquirer  # type: ignore
+except Exception:
+    inquirer = None  # Fallback to simple input-based menu when unavailable
+import pandas as pd
+
+# Local imports
+import logging_util
+from settings import CSV_DIRECTORY
+from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from .setup_utils import (
+    download_cards_csv,
+    filter_dataframe,
+    process_legendary_cards,
+    check_csv_exists,
+    save_color_filtered_csvs,
+    enrich_commander_rows_with_tags,
+)
+from exceptions import (
+    CSVFileNotFoundError,
+    CommanderValidationError,
+    MTGJSONDownloadError
+)
+from scripts import generate_background_cards as background_cards_script
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _generate_background_catalog(cards_path: str, output_path: str) -> None:
+    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
+
+    logger.info('Generating background cards catalog')
+    args = [
+        '--source', cards_path,
+        '--output', output_path,
+    ]
+    try:
+        background_cards_script.main(args)
+    except Exception:  # pragma: no cover - surfaced to caller/test
+        logger.exception('Failed to generate background catalog')
+        raise
+    else:
+        logger.info('Background cards catalog generated successfully')
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Create CSV directory if it doesn't exist
+if not os.path.exists(CSV_DIRECTORY):
+    os.makedirs(CSV_DIRECTORY)
+
+## Note: using shared check_csv_exists from setup_utils to avoid duplication
+
+def initial_setup() -> None:
+    """Perform initial setup by downloading card data and creating filtered CSV files.
+    
+    Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
+    and generates commander-eligible cards list. Uses utility functions from setup_utils.py
+    for file operations and data processing.
+    
+    Raises:
+        CSVFileNotFoundError: If required CSV files cannot be found
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    logger.info('Checking for cards.csv file')
+    
+    try:
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        try:
+            with open(cards_file, 'r', encoding='utf-8'):
+                logger.info('cards.csv exists')
+        except FileNotFoundError:
+            logger.info('cards.csv not found, downloading from mtgjson')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        logger.info('Checking for color identity sorted files')
+        # Generate color-identity filtered CSVs in one pass
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+        
+        # Generate commander list
+        determine_commanders()
+
+    except Exception as e:
+        logger.error(f'Error during initial setup: {str(e)}')
+        raise
+
+## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
+
+def determine_commanders() -> None:
+    """Generate commander_cards.csv containing all cards eligible to be commanders.
+    
+    This function processes the card database to identify and validate commander-eligible cards,
+    applying comprehensive validation steps and filtering criteria.
+    
+    Raises:
+        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
+        MTGJSONDownloadError: If downloading cards data fails
+        CommanderValidationError: If commander validation fails
+        DataFrameProcessingError: If data processing operations fail
+    """
+    logger.info('Starting commander card generation process')
+    
+    try:
+        # Check for cards.csv with progress tracking
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        if not check_csv_exists(cards_file):
+            logger.info('cards.csv not found, initiating download')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        else:
+            logger.info('cards.csv found, proceeding with processing')
+        
+        # Load and process cards data
+        logger.info('Loading card data from CSV')
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        # Process legendary cards with validation
+        logger.info('Processing and validating legendary cards')
+        try:
+            filtered_df = process_legendary_cards(df)
+        except CommanderValidationError as e:
+            logger.error(f'Commander validation failed: {str(e)}')
+            raise
+        
+        # Apply standard filters
+        logger.info('Applying standard card filters')
+        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
+        
+        logger.info('Enriching commander metadata with theme and creature tags')
+        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
+
+        # Save commander cards
+        logger.info('Saving validated commander cards')
+        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
+        filtered_df.to_csv(commander_path, index=False)
+
+        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
+        _generate_background_catalog(cards_file, background_output)
+
+        logger.info('Commander card generation completed successfully')
+        
+    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
+        logger.error(f'File operation error: {str(e)}')
+        raise
+    except CommanderValidationError as e:
+        logger.error(f'Commander validation error: {str(e)}')
+        raise
+    except Exception as e:
+        logger.error(f'Unexpected error during commander generation: {str(e)}')
+        raise
+    
+def regenerate_csvs_all() -> None:
+    """Regenerate all color-filtered CSV files from latest card data.
+    
+    Downloads fresh card data and recreates all color-filtered CSV files.
+    Useful for updating the card database when new sets are released.
+    
+    Raises:
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        logger.info('Downloading latest card data from MTGJSON')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
+        
+        logger.info('Regenerating color identity sorted files')
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+            
+        logger.info('Regenerating commander cards')
+        determine_commanders()
+        
+        logger.info('Card database regeneration complete')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate card database: {str(e)}')
+        raise
+    # Once files are regenerated, create a new legendary list (already executed in try)
+
+def regenerate_csv_by_color(color: str) -> None:
+    """Regenerate CSV file for a specific color identity.
+    
+    Args:
+        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        
+    Raises:
+        ValueError: If color is not valid
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        if color not in SETUP_COLORS:
+            raise ValueError(f'Invalid color: {color}')
+
+        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
+
+        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(
+            f'{CSV_DIRECTORY}/cards.csv',
+            low_memory=False,
+            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
+            encoding_errors='replace'  # Replace bad encoding chars
+        )
+
+        logger.info(f'Regenerating {color} cards CSV')
+        # Use shared utilities to base-filter once then slice color, honoring bans
+        base_df = filter_dataframe(df, BANNED_CARDS)
+        base_df[base_df['colorIdentity'] == color_abv].to_csv(
+            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
+        )
+
+        logger.info(f'Successfully regenerated {color} cards database')
+
+    except Exception as e:
+        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        raise
+
+class SetupOption(Enum):
+    """Enum for setup menu options."""
+    INITIAL_SETUP = 'Initial Setup'
+    REGENERATE_CSV = 'Regenerate CSV Files'
+    BACK = 'Back'
+
+def _display_setup_menu() -> SetupOption:
+    """Display the setup menu and return the selected option.
+    
+    Returns:
+        SetupOption: The selected menu option
+    """
+    if inquirer is not None:
+        question: List[Dict[str, Any]] = [
+            inquirer.List(
+                'menu',
+                choices=[option.value for option in SetupOption],
+                carousel=True)]
+        answer = inquirer.prompt(question)
+        return SetupOption(answer['menu'])
+
+    # Simple fallback when inquirer isn't installed (e.g., headless/container)
+    options = list(SetupOption)
+    print("\nSetup Menu:")
+    for idx, opt in enumerate(options, start=1):
+        print(f"  {idx}) {opt.value}")
+    while True:
+        try:
+            sel = input("Select an option [1]: ").strip() or "1"
+            i = int(sel)
+            if 1 <= i <= len(options):
+                return options[i - 1]
+        except KeyboardInterrupt:
+            print("")
+            return SetupOption.BACK
+        except Exception:
+            pass
+        print("Invalid selection. Please try again.")
+
+def setup() -> bool:
+    """Run the setup process for the MTG Python Deckbuilder.
+    
+    This function provides a menu-driven interface to:
+    1. Perform initial setup by downloading and processing card data
+    2. Regenerate CSV files with updated card data
+    3. Perform all tagging processes on the color-sorted csv files
+    
+    The function handles errors gracefully and provides feedback through logging.
+    
+    Returns:
+        bool: True if setup completed successfully, False otherwise
+    """
+    try:
+        print('Which setup operation would you like to perform?\n'
+              'If this is your first time setting up, do the initial setup.\n'
+              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+        
+        choice = _display_setup_menu()
+        
+        if choice == SetupOption.INITIAL_SETUP:
+            logger.info('Starting initial setup')
+            initial_setup()
+            logger.info('Initial setup completed successfully')
+            return True
+            
+        elif choice == SetupOption.REGENERATE_CSV:
+            logger.info('Starting CSV regeneration')
+            regenerate_csvs_all()
+            logger.info('CSV regeneration completed successfully')
+            return True
+            
+        elif choice == SetupOption.BACK:
+            logger.info('Setup cancelled by user')
+            return False
+            
+    except Exception as e:
+        logger.error(f'Error during setup: {e}')
+        raise
+    
+    return False
--- a/code/file_setup/old/setup_constants.py
+++ b/code/file_setup/old/setup_constants.py
@ -0,0 +1,114 @@
+from typing import Dict, List
+from settings import (
+    SETUP_COLORS,
+    COLOR_ABRV,
+    CARD_DATA_COLUMNS as COLUMN_ORDER,  # backward compatible alias
+    CARD_DATA_COLUMNS as TAGGED_COLUMN_ORDER,
+)
+
+__all__ = [
+    'SETUP_COLORS', 'COLOR_ABRV', 'COLUMN_ORDER', 'TAGGED_COLUMN_ORDER',
+    'BANNED_CARDS', 'MTGJSON_API_URL', 'LEGENDARY_OPTIONS', 'NON_LEGAL_SETS',
+    'CARD_TYPES_TO_EXCLUDE', 'CSV_PROCESSING_COLUMNS', 'SORT_CONFIG',
+    'FILTER_CONFIG'
+]
+
+# Banned cards consolidated here (remains specific to setup concerns)
+BANNED_CARDS: List[str] = [
+    # Commander banned list
+    'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
+    'Chaos Orb', 'Channel', 'Dockside Extortionist',
+    'Emrakul, the Aeons Torn',
+    'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
+    'Flash', 'Golos, Tireless Pilgrim',
+    'Griselbrand', 'Hullbreacher', 'Iona, Shield of Emeria',
+    'Karakas', 'Jeweled Lotus', 'Leovold, Emissary of Trest',
+    'Library of Alexandria', 'Limited Resources', 'Lutri, the Spellchaser',
+    'Mana Crypt', 'Mox Emerald', 'Mox Jet', 'Mox Pearl', 'Mox Ruby',
+    'Mox Sapphire', 'Nadu, Winged Wisdom',
+    'Paradox Engine', 'Primeval Titan', 'Prophet of Kruphix',
+    'Recurring Nightmare', 'Rofellos, Llanowar Emissary', 'Shahrazad',
+    'Sundering Titan', 'Sylvan Primordial',
+    'Time Vault', 'Time Walk', 'Tinker', 'Tolarian Academy',
+    'Trade Secrets', 'Upheaval', "Yawgmoth's Bargain",
+    # Problematic / culturally sensitive or banned in other formats
+    'Invoke Prejudice', 'Cleanse', 'Stone-Throwing Devils', 'Pradesh Gypsies',
+    'Jihad', 'Imprison', 'Crusade',
+    # Cards of the Hero type (non creature)
+    "The Protector", "The Hunter", "The Savant", "The Explorer",
+    "The Philosopher", "The Harvester", "The Tyrant", "The Vanquisher",
+    "The Avenger", "The Slayer", "The Warmonger", "The Destined",
+    "The Warrior", "The General", "The Provider", "The Champion",
+    # Hero Equipment
+    "Spear of the General", "Lash of the Tyrant", "Bow of the Hunter",
+    "Cloak of the Philosopher", "Axe of the Warmonger"
+]
+
+# Constants for setup and CSV processing
+MTGJSON_API_URL: str = 'https://mtgjson.com/api/v5/csv/cards.csv'
+
+LEGENDARY_OPTIONS: List[str] = [
+    'Legendary Creature',
+    'Legendary Artifact',
+    'Legendary Artifact Creature', 
+    'Legendary Enchantment Creature',
+    'Legendary Planeswalker'
+]
+
+NON_LEGAL_SETS: List[str] = [
+    'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
+    'UGL', 'UND', 'UNH', 'UST'
+]
+
+CARD_TYPES_TO_EXCLUDE: List[str] = [
+    'Plane —',
+    'Conspiracy',
+    'Vanguard', 
+    'Scheme',
+    'Phenomenon',
+    'Stickers',
+    'Attraction',
+    'Contraption'
+]
+
+# Columns to keep when processing CSV files
+CSV_PROCESSING_COLUMNS: List[str] = [
+    'name',        # Card name
+    'faceName',    # Name of specific face for multi-faced cards
+    'edhrecRank',  # Card's rank on EDHREC
+    'colorIdentity',  # Color identity for Commander format
+    'colors',      # Actual colors in card's mana cost
+    'manaCost',    # Mana cost string
+    'manaValue',   # Converted mana cost
+    'type',        # Card type line
+    'layout',      # Card layout (normal, split, etc)
+    'text',        # Card text/rules
+    'power',       # Power (for creatures)
+    'toughness',   # Toughness (for creatures)
+    'keywords',    # Card's keywords
+    'side'         # Side identifier for multi-faced cards
+]
+
+# Configuration for DataFrame sorting operations
+SORT_CONFIG = {
+    'columns': ['name', 'side'],  # Columns to sort by
+    'case_sensitive': False  # Ignore case when sorting
+}
+
+# Configuration for DataFrame filtering operations
+FILTER_CONFIG: Dict[str, Dict[str, List[str]]] = {
+    'layout': {
+        'exclude': ['reversible_card']
+    },
+    'availability': {
+        'require': ['paper']
+    },
+    'promoTypes': {
+        'exclude': ['playtest']
+    },
+    'securityStamp': {
+        'exclude': ['Heart', 'Acorn']
+    }
+}
+
+# COLUMN_ORDER and TAGGED_COLUMN_ORDER now sourced from settings via CARD_DATA_COLUMNS
--- a/code/file_setup/old/setup_csv.py
+++ b/code/file_setup/old/setup_csv.py
@ -0,0 +1,342 @@
+"""MTG Python Deckbuilder setup module.
+
+This module provides the main setup functionality for the MTG Python Deckbuilder
+application. It handles initial setup tasks such as downloading card data,
+creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+
+        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
+
+Key Features:
+    - Initial setup and configuration
+    - Card data download and processing
+    - Color-based card filtering
+    - Commander card list generation
+    - CSV file management and validation
+
+The module works in conjunction with setup_utils.py for utility functions and
+exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+from enum import Enum
+import os
+from typing import List, Dict, Any
+
+# Third-party imports (optional)
+try:
+    import inquirer  # type: ignore
+except Exception:
+    inquirer = None  # Fallback to simple input-based menu when unavailable
+import pandas as pd
+
+# Local imports
+import logging_util
+from settings import CSV_DIRECTORY
+from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from .setup_utils import (
+    download_cards_csv,
+    filter_dataframe,
+    process_legendary_cards,
+    check_csv_exists,
+    save_color_filtered_csvs,
+    enrich_commander_rows_with_tags,
+)
+from exceptions import (
+    CSVFileNotFoundError,
+    CommanderValidationError,
+    MTGJSONDownloadError
+)
+from scripts import generate_background_cards as background_cards_script
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _generate_background_catalog(cards_path: str, output_path: str) -> None:
+    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
+
+    logger.info('Generating background cards catalog')
+    args = [
+        '--source', cards_path,
+        '--output', output_path,
+    ]
+    try:
+        background_cards_script.main(args)
+    except Exception:  # pragma: no cover - surfaced to caller/test
+        logger.exception('Failed to generate background catalog')
+        raise
+    else:
+        logger.info('Background cards catalog generated successfully')
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Create CSV directory if it doesn't exist
+if not os.path.exists(CSV_DIRECTORY):
+    os.makedirs(CSV_DIRECTORY)
+
+## Note: using shared check_csv_exists from setup_utils to avoid duplication
+
+def initial_setup() -> None:
+    """Perform initial setup by downloading and processing card data.
+    
+    **MIGRATION NOTE**: This function now delegates to the Parquet-based setup
+    (initial_setup_parquet) instead of the legacy CSV workflow. The old CSV-based
+    setup is preserved in code/file_setup/old/setup.py for reference.
+    
+    Downloads the latest card data from MTGJSON as Parquet, processes it, and creates
+    the unified all_cards.parquet file. No color-specific files are generated - filtering
+    happens at query time instead.
+    
+    Raises:
+        Various exceptions from Parquet download/processing steps
+    """
+    from .setup_parquet import initial_setup_parquet
+    initial_setup_parquet()
+
+## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
+
+def determine_commanders() -> None:
+    """Generate commander_cards.csv containing all cards eligible to be commanders.
+    
+    This function processes the card database to identify and validate commander-eligible cards,
+    applying comprehensive validation steps and filtering criteria.
+    
+    Raises:
+        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
+        MTGJSONDownloadError: If downloading cards data fails
+        CommanderValidationError: If commander validation fails
+        DataFrameProcessingError: If data processing operations fail
+    """
+    logger.info('Starting commander card generation process')
+    
+    try:
+        # Check for cards.csv with progress tracking
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        if not check_csv_exists(cards_file):
+            logger.info('cards.csv not found, initiating download')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        else:
+            logger.info('cards.csv found, proceeding with processing')
+        
+        # Load and process cards data
+        logger.info('Loading card data from CSV')
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        # Process legendary cards with validation
+        logger.info('Processing and validating legendary cards')
+        try:
+            filtered_df = process_legendary_cards(df)
+        except CommanderValidationError as e:
+            logger.error(f'Commander validation failed: {str(e)}')
+            raise
+        
+        # Apply standard filters
+        logger.info('Applying standard card filters')
+        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
+        
+        logger.info('Enriching commander metadata with theme and creature tags')
+        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
+
+        # Save commander cards
+        logger.info('Saving validated commander cards')
+        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
+        filtered_df.to_csv(commander_path, index=False)
+
+        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
+        _generate_background_catalog(cards_file, background_output)
+
+        logger.info('Commander card generation completed successfully')
+        
+    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
+        logger.error(f'File operation error: {str(e)}')
+        raise
+    except CommanderValidationError as e:
+        logger.error(f'Commander validation error: {str(e)}')
+        raise
+    except Exception as e:
+        logger.error(f'Unexpected error during commander generation: {str(e)}')
+        raise
+    
+def regenerate_csvs_all() -> None:
+    """Regenerate all color-filtered CSV files from latest card data.
+    
+    Downloads fresh card data and recreates all color-filtered CSV files.
+    Useful for updating the card database when new sets are released.
+    
+    Raises:
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        logger.info('Downloading latest card data from MTGJSON')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
+        
+        logger.info('Regenerating color identity sorted files')
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+            
+        logger.info('Regenerating commander cards')
+        determine_commanders()
+        
+        logger.info('Card database regeneration complete')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate card database: {str(e)}')
+        raise
+    # Once files are regenerated, create a new legendary list (already executed in try)
+
+def regenerate_csv_by_color(color: str) -> None:
+    """Regenerate CSV file for a specific color identity.
+    
+    Args:
+        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        
+    Raises:
+        ValueError: If color is not valid
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        if color not in SETUP_COLORS:
+            raise ValueError(f'Invalid color: {color}')
+
+        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
+
+        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(
+            f'{CSV_DIRECTORY}/cards.csv',
+            low_memory=False,
+            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
+            encoding_errors='replace'  # Replace bad encoding chars
+        )
+
+        logger.info(f'Regenerating {color} cards CSV')
+        # Use shared utilities to base-filter once then slice color, honoring bans
+        base_df = filter_dataframe(df, BANNED_CARDS)
+        base_df[base_df['colorIdentity'] == color_abv].to_csv(
+            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
+        )
+
+        logger.info(f'Successfully regenerated {color} cards database')
+
+    except Exception as e:
+        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        raise
+
+class SetupOption(Enum):
+    """Enum for setup menu options."""
+    INITIAL_SETUP = 'Initial Setup'
+    REGENERATE_CSV = 'Regenerate CSV Files'
+    BACK = 'Back'
+
+def _display_setup_menu() -> SetupOption:
+    """Display the setup menu and return the selected option.
+    
+    Returns:
+        SetupOption: The selected menu option
+    """
+    if inquirer is not None:
+        question: List[Dict[str, Any]] = [
+            inquirer.List(
+                'menu',
+                choices=[option.value for option in SetupOption],
+                carousel=True)]
+        answer = inquirer.prompt(question)
+        return SetupOption(answer['menu'])
+
+    # Simple fallback when inquirer isn't installed (e.g., headless/container)
+    options = list(SetupOption)
+    print("\nSetup Menu:")
+    for idx, opt in enumerate(options, start=1):
+        print(f"  {idx}) {opt.value}")
+    while True:
+        try:
+            sel = input("Select an option [1]: ").strip() or "1"
+            i = int(sel)
+            if 1 <= i <= len(options):
+                return options[i - 1]
+        except KeyboardInterrupt:
+            print("")
+            return SetupOption.BACK
+        except Exception:
+            pass
+        print("Invalid selection. Please try again.")
+
+def setup() -> bool:
+    """Run the setup process for the MTG Python Deckbuilder.
+    
+    This function provides a menu-driven interface to:
+    1. Perform initial setup by downloading and processing card data
+    2. Regenerate CSV files with updated card data
+    3. Perform all tagging processes on the color-sorted csv files
+    
+    The function handles errors gracefully and provides feedback through logging.
+    
+    Returns:
+        bool: True if setup completed successfully, False otherwise
+    """
+    try:
+        print('Which setup operation would you like to perform?\n'
+              'If this is your first time setting up, do the initial setup.\n'
+              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+        
+        choice = _display_setup_menu()
+        
+        if choice == SetupOption.INITIAL_SETUP:
+            logger.info('Starting initial setup')
+            initial_setup()
+            logger.info('Initial setup completed successfully')
+            return True
+            
+        elif choice == SetupOption.REGENERATE_CSV:
+            logger.info('Starting CSV regeneration')
+            regenerate_csvs_all()
+            logger.info('CSV regeneration completed successfully')
+            return True
+            
+        elif choice == SetupOption.BACK:
+            logger.info('Setup cancelled by user')
+            return False
+            
+    except Exception as e:
+        logger.error(f'Error during setup: {e}')
+        raise
+    
+    return False
--- a/code/file_setup/old/setup_utils.py
+++ b/code/file_setup/old/setup_utils.py
@ -0,0 +1,776 @@
+"""MTG Python Deckbuilder setup utilities.
+
+This module provides utility functions for setting up and managing the MTG Python Deckbuilder
+application. It handles tasks such as downloading card data, filtering cards by various criteria,
+and processing legendary creatures for commander format.
+
+Key Features:
+    - Card data download from MTGJSON
+    - DataFrame filtering and processing
+    - Color identity filtering
+    - Commander validation
+    - CSV file management
+
+The module integrates with settings.py for configuration and exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+import ast
+import requests
+from pathlib import Path
+from typing import List, Optional, Union, TypedDict, Iterable, Dict, Any
+
+# Third-party imports
+import pandas as pd
+from tqdm import tqdm
+import json
+from datetime import datetime
+
+# Local application imports
+from .setup_constants import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    SORT_CONFIG,
+    FILTER_CONFIG,
+    COLUMN_ORDER,
+    TAGGED_COLUMN_ORDER,
+    SETUP_COLORS,
+    COLOR_ABRV,
+    BANNED_CARDS,
+)
+from exceptions import (
+    MTGJSONDownloadError,
+    DataFrameProcessingError,
+    ColorFilterError,
+    CommanderValidationError
+)
+from type_definitions import CardLibraryDF
+from settings import FILL_NA_COLUMNS, CSV_DIRECTORY
+import logging_util
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+
+def _is_primary_side(value: object) -> bool:
+    """Return True when the provided side marker corresponds to a primary face."""
+    try:
+        if pd.isna(value):
+            return True
+    except Exception:
+        pass
+    text = str(value).strip().lower()
+    return text in {"", "a"}
+
+
+def _summarize_secondary_face_exclusions(
+    names: Iterable[str],
+    source_df: pd.DataFrame,
+) -> List[Dict[str, Any]]:
+    summaries: List[Dict[str, Any]] = []
+    if not names:
+        return summaries
+
+    for raw_name in names:
+        name = str(raw_name)
+        group = source_df[source_df['name'] == name]
+        if group.empty:
+            continue
+
+        primary_rows = group[group['side'].apply(_is_primary_side)] if 'side' in group.columns else pd.DataFrame()
+        primary_face = (
+            str(primary_rows['faceName'].iloc[0])
+            if not primary_rows.empty and 'faceName' in primary_rows.columns
+            else ""
+        )
+        layout = str(group['layout'].iloc[0]) if 'layout' in group.columns and not group.empty else ""
+        faces = sorted(set(str(v) for v in group.get('faceName', pd.Series(dtype=str)).dropna().tolist()))
+        eligible_faces = sorted(
+            set(
+                str(v)
+                for v in group
+                .loc[~group['side'].apply(_is_primary_side) if 'side' in group.columns else [False] * len(group)]
+                .get('faceName', pd.Series(dtype=str))
+                .dropna()
+                .tolist()
+            )
+        )
+
+        summaries.append(
+            {
+                "name": name,
+                "primary_face": primary_face or name.split('//')[0].strip(),
+                "layout": layout,
+                "faces": faces,
+                "eligible_faces": eligible_faces,
+                "reason": "secondary_face_only",
+            }
+        )
+
+    return summaries
+
+
+def _write_commander_exclusions_log(entries: List[Dict[str, Any]]) -> None:
+    """Persist commander exclusion diagnostics for downstream tooling."""
+
+    path = Path(CSV_DIRECTORY) / ".commander_exclusions.json"
+
+    if not entries:
+        try:
+            path.unlink()
+        except FileNotFoundError:
+            return
+        except Exception as exc:
+            logger.debug("Unable to remove commander exclusion log: %s", exc)
+        return
+
+    payload = {
+        "generated_at": datetime.now().isoformat(timespec='seconds'),
+        "secondary_face_only": entries,
+    }
+
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open('w', encoding='utf-8') as handle:
+            json.dump(payload, handle, indent=2, ensure_ascii=False)
+    except Exception as exc:
+        logger.warning("Failed to write commander exclusion diagnostics: %s", exc)
+
+
+def _enforce_primary_face_commander_rules(
+    candidate_df: pd.DataFrame,
+    source_df: pd.DataFrame,
+) -> pd.DataFrame:
+    """Retain only primary faces and record any secondary-face-only exclusions."""
+
+    if candidate_df.empty or 'side' not in candidate_df.columns:
+        _write_commander_exclusions_log([])
+        return candidate_df
+
+    mask_primary = candidate_df['side'].apply(_is_primary_side)
+    primary_df = candidate_df[mask_primary].copy()
+    secondary_df = candidate_df[~mask_primary]
+
+    primary_names = set(str(n) for n in primary_df.get('name', pd.Series(dtype=str)))
+    secondary_only_names = sorted(
+        set(str(n) for n in secondary_df.get('name', pd.Series(dtype=str))) - primary_names
+    )
+
+    if secondary_only_names:
+        logger.info(
+            "Excluding %d commander entries where only a secondary face is eligible: %s",
+            len(secondary_only_names),
+            ", ".join(secondary_only_names),
+        )
+
+    entries = _summarize_secondary_face_exclusions(secondary_only_names, source_df)
+    _write_commander_exclusions_log(entries)
+
+    return primary_df
+
+
+def _coerce_tag_list(value: object) -> List[str]:
+    """Normalize various list-like representations into a list of strings."""
+
+    if value is None:
+        return []
+    if isinstance(value, float) and pd.isna(value):
+        return []
+    if isinstance(value, (list, tuple, set)):
+        return [str(v).strip() for v in value if str(v).strip()]
+    text = str(value).strip()
+    if not text:
+        return []
+    try:
+        parsed = ast.literal_eval(text)
+        if isinstance(parsed, (list, tuple, set)):
+            return [str(v).strip() for v in parsed if str(v).strip()]
+    except Exception:
+        pass
+    parts = [part.strip() for part in text.replace(";", ",").split(",")]
+    return [part for part in parts if part]
+
+
+def _collect_commander_tag_metadata(csv_dir: Union[str, Path]) -> Dict[str, Dict[str, List[str]]]:
+    """Aggregate theme and creature tags from color-tagged CSV files."""
+
+    path = Path(csv_dir)
+    if not path.exists():
+        return {}
+
+    combined: Dict[str, Dict[str, set[str]]] = {}
+    columns = ("themeTags", "creatureTypes", "roleTags")
+
+    for color in SETUP_COLORS:
+        color_path = path / f"{color}_cards.csv"
+        if not color_path.exists():
+            continue
+        try:
+            df = pd.read_csv(color_path, low_memory=False)
+        except Exception as exc:
+            logger.debug("Unable to read %s for commander tag enrichment: %s", color_path, exc)
+            continue
+
+        if df.empty or ("name" not in df.columns and "faceName" not in df.columns):
+            continue
+
+        for _, row in df.iterrows():
+            face_key = str(row.get("faceName", "")).strip()
+            name_key = str(row.get("name", "")).strip()
+            keys = {k for k in (face_key, name_key) if k}
+            if not keys:
+                continue
+
+            for key in keys:
+                bucket = combined.setdefault(key, {col: set() for col in columns})
+                for col in columns:
+                    if col not in row:
+                        continue
+                    values = _coerce_tag_list(row.get(col))
+                    if values:
+                        bucket[col].update(values)
+
+    enriched: Dict[str, Dict[str, List[str]]] = {}
+    for key, data in combined.items():
+        enriched[key] = {col: sorted(values) for col, values in data.items() if values}
+    return enriched
+
+
+def enrich_commander_rows_with_tags(
+    df: pd.DataFrame,
+    csv_dir: Union[str, Path],
+) -> pd.DataFrame:
+    """Attach theme and creature tag metadata to commander rows when available."""
+
+    if df.empty:
+        df = df.copy()
+        for column in ("themeTags", "creatureTypes", "roleTags"):
+            if column not in df.columns:
+                df[column] = []
+        return df
+
+    metadata = _collect_commander_tag_metadata(csv_dir)
+    if not metadata:
+        df = df.copy()
+        for column in ("themeTags", "creatureTypes", "roleTags"):
+            if column not in df.columns:
+                df[column] = [[] for _ in range(len(df))]
+        return df
+
+    df = df.copy()
+    for column in ("themeTags", "creatureTypes", "roleTags"):
+        if column not in df.columns:
+            df[column] = [[] for _ in range(len(df))]
+
+    theme_values: List[List[str]] = []
+    creature_values: List[List[str]] = []
+    role_values: List[List[str]] = []
+
+    for _, row in df.iterrows():
+        face_key = str(row.get("faceName", "")).strip()
+        name_key = str(row.get("name", "")).strip()
+
+        entry_face = metadata.get(face_key, {})
+        entry_name = metadata.get(name_key, {})
+
+        combined: Dict[str, set[str]] = {
+            "themeTags": set(_coerce_tag_list(row.get("themeTags"))),
+            "creatureTypes": set(_coerce_tag_list(row.get("creatureTypes"))),
+            "roleTags": set(_coerce_tag_list(row.get("roleTags"))),
+        }
+
+        for source in (entry_face, entry_name):
+            for column in combined:
+                combined[column].update(source.get(column, []))
+
+        theme_values.append(sorted(combined["themeTags"]))
+        creature_values.append(sorted(combined["creatureTypes"]))
+        role_values.append(sorted(combined["roleTags"]))
+
+    df["themeTags"] = theme_values
+    df["creatureTypes"] = creature_values
+    df["roleTags"] = role_values
+
+    enriched_rows = sum(1 for t, c, r in zip(theme_values, creature_values, role_values) if t or c or r)
+    logger.debug("Enriched %d commander rows with tag metadata", enriched_rows)
+
+    return df
+
+# Type definitions
+class FilterRule(TypedDict):
+    """Type definition for filter rules configuration."""
+    exclude: Optional[List[str]]
+    require: Optional[List[str]]
+
+class FilterConfig(TypedDict):
+    """Type definition for complete filter configuration."""
+    layout: FilterRule
+    availability: FilterRule
+    promoTypes: FilterRule
+    securityStamp: FilterRule
+def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
+    """Download cards data from MTGJSON and save to CSV.
+
+    Downloads card data from the specified MTGJSON URL and saves it to a local CSV file.
+    Shows a progress bar during download using tqdm.
+
+    Args:
+        url: URL to download cards data from (typically MTGJSON API endpoint)
+        output_path: Path where the downloaded CSV file will be saved
+
+    Raises:
+        MTGJSONDownloadError: If download fails due to network issues or invalid response
+
+    Example:
+        >>> download_cards_csv('https://mtgjson.com/api/v5/cards.csv', 'cards.csv')
+    """
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        
+        with open(output_path, 'wb') as f:
+            with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    size = f.write(chunk)
+                    pbar.update(size)
+            
+    except requests.RequestException as e:
+        logger.error(f'Failed to download cards data from {url}')
+        raise MTGJSONDownloadError(
+            "Failed to download cards data",
+            url,
+            getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
+        ) from e
+def check_csv_exists(filepath: Union[str, Path]) -> bool:
+    """Check if a CSV file exists at the specified path.
+
+    Verifies the existence of a CSV file at the given path. This function is used
+    to determine if card data needs to be downloaded or if it already exists locally.
+
+    Args:
+        filepath: Path to the CSV file to check
+
+    Returns:
+        bool: True if the file exists, False otherwise
+
+    Example:
+        >>> if not check_csv_exists('cards.csv'):
+        ...     download_cards_csv(MTGJSON_API_URL, 'cards.csv')
+    """
+    return Path(filepath).is_file()
+
+def save_color_filtered_csvs(df: pd.DataFrame, out_dir: Union[str, Path]) -> None:
+    """Generate and save color-identity filtered CSVs for all configured colors.
+
+    Iterates across configured color names and their corresponding color identity
+    abbreviations, filters the provided DataFrame using standard filters plus
+    color identity, and writes each filtered set to CSV in the provided directory.
+
+    Args:
+        df: Source DataFrame containing card data.
+        out_dir: Output directory for the generated CSV files.
+
+    Raises:
+        DataFrameProcessingError: If filtering fails.
+        ColorFilterError: If color filtering fails for a specific color.
+    """
+    out_path = Path(out_dir)
+    out_path.mkdir(parents=True, exist_ok=True)
+
+    # Base-filter once for efficiency, then per-color filter without redoing base filters
+    try:
+        # Apply full standard filtering including banned list once, then slice per color
+        base_df = filter_dataframe(df, BANNED_CARDS)
+    except Exception as e:
+        # Wrap any unexpected issues as DataFrameProcessingError
+        raise DataFrameProcessingError(
+            "Failed to prepare base DataFrame for color filtering",
+            "base_color_filtering",
+            str(e)
+        ) from e
+
+    for color_name, color_id in zip(SETUP_COLORS, COLOR_ABRV):
+        try:
+            logger.info(f"Generating {color_name}_cards.csv")
+            color_df = base_df[base_df['colorIdentity'] == color_id]
+            color_df.to_csv(out_path / f"{color_name}_cards.csv", index=False)
+        except Exception as e:
+            raise ColorFilterError(
+                "Failed to generate color CSV",
+                color_id,
+                str(e)
+            ) from e
+
+def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
+    """Apply standard filters to the cards DataFrame using configuration from settings.
+
+    Applies a series of filters to the cards DataFrame based on configuration from settings.py.
+    This includes handling null values, applying basic filters, removing illegal sets and banned cards,
+    and processing special card types.
+
+    Args:
+        df: pandas DataFrame containing card data to filter
+        banned_cards: List of card names that are banned and should be excluded
+
+    Returns:
+        pd.DataFrame: A new DataFrame containing only the cards that pass all filters
+
+    Raises:
+        DataFrameProcessingError: If any filtering operation fails
+
+    Example:
+        >>> filtered_df = filter_dataframe(cards_df, ['Channel', 'Black Lotus'])
+    """
+    try:
+        logger.info('Starting standard DataFrame filtering')
+        
+        # Fill null values according to configuration
+        for col, fill_value in FILL_NA_COLUMNS.items():
+            if col == 'faceName':
+                fill_value = df['name']
+            df[col] = df[col].fillna(fill_value)
+            logger.debug(f'Filled NA values in {col} with {fill_value}')
+        
+        # Apply basic filters from configuration
+        filtered_df = df.copy()
+        filter_config: FilterConfig = FILTER_CONFIG  # Type hint for configuration
+        for field, rules in filter_config.items():
+            if field not in filtered_df.columns:
+                logger.warning('Skipping filter for missing field %s', field)
+                continue
+
+            for rule_type, values in rules.items():
+                if not values:
+                    continue
+
+                if rule_type == 'exclude':
+                    for value in values:
+                        mask = filtered_df[field].astype(str).str.contains(
+                            value,
+                            case=False,
+                            na=False,
+                            regex=False
+                        )
+                        filtered_df = filtered_df[~mask]
+                elif rule_type == 'require':
+                    for value in values:
+                        mask = filtered_df[field].astype(str).str.contains(
+                            value,
+                            case=False,
+                            na=False,
+                            regex=False
+                        )
+                        filtered_df = filtered_df[mask]
+                else:
+                    logger.warning('Unknown filter rule type %s for field %s', rule_type, field)
+                    continue
+
+                logger.debug(f'Applied {rule_type} filter for {field}: {values}')
+        
+        # Remove illegal sets
+        for set_code in NON_LEGAL_SETS:
+            filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
+        logger.debug('Removed illegal sets')
+
+        # Remove banned cards (exact, case-insensitive match on name or faceName)
+        if banned_cards:
+            banned_set = {b.casefold() for b in banned_cards}
+            name_lc = filtered_df['name'].astype(str).str.casefold()
+            face_lc = filtered_df['faceName'].astype(str).str.casefold()
+            mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
+            before = len(filtered_df)
+            filtered_df = filtered_df[mask]
+            after = len(filtered_df)
+            logger.debug(f'Removed banned cards: {before - after} filtered out')
+
+        # Remove special card types
+        for card_type in CARD_TYPES_TO_EXCLUDE:
+            filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
+        logger.debug('Removed special card types')
+
+        # Select columns, sort, and drop duplicates
+        filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
+        filtered_df = filtered_df.sort_values(
+            by=SORT_CONFIG['columns'],
+            key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
+        )
+        filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
+        logger.info('Completed standard DataFrame filtering')
+        
+        return filtered_df
+
+    except Exception as e:
+        logger.error(f'Failed to filter DataFrame: {str(e)}')
+        raise DataFrameProcessingError(
+            "Failed to filter DataFrame",
+            "standard_filtering",
+            str(e)
+        ) from e
+def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
+    """Filter DataFrame by color identity with additional color-specific processing.
+
+    This function extends the base filter_dataframe functionality with color-specific
+    filtering logic. It is used by setup.py's filter_by_color function but provides
+    a more robust and configurable implementation.
+
+    Args:
+        df: DataFrame to filter
+        color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
+
+    Returns:
+        DataFrame filtered by color identity
+
+    Raises:
+        ColorFilterError: If color identity is invalid or filtering fails
+        DataFrameProcessingError: If general filtering operations fail
+    """
+    try:
+        logger.info(f'Filtering cards for color identity: {color_identity}')
+
+        # Validate color identity
+        with tqdm(total=1, desc='Validating color identity') as pbar:
+            if not isinstance(color_identity, str):
+                raise ColorFilterError(
+                    "Invalid color identity type",
+                    str(color_identity),
+                    "Color identity must be a string"
+                )
+            pbar.update(1)
+            
+        # Apply base filtering
+        with tqdm(total=1, desc='Applying base filtering') as pbar:
+            filtered_df = filter_dataframe(df, BANNED_CARDS)
+            pbar.update(1)
+            
+        # Filter by color identity
+        with tqdm(total=1, desc='Filtering by color identity') as pbar:
+            filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
+            logger.debug(f'Applied color identity filter: {color_identity}')
+            pbar.update(1)
+            
+        # Additional color-specific processing
+        with tqdm(total=1, desc='Performing color-specific processing') as pbar:
+            # Placeholder for future color-specific processing
+            pbar.update(1)
+        logger.info(f'Completed color identity filtering for {color_identity}')
+        return filtered_df
+        
+    except DataFrameProcessingError as e:
+        raise ColorFilterError(
+            "Color filtering failed",
+            color_identity,
+            str(e)
+        ) from e
+    except Exception as e:
+        raise ColorFilterError(
+            "Unexpected error during color filtering",
+            color_identity,
+            str(e)
+        ) from e
+        
+def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
+    """Process and filter legendary cards for commander eligibility with comprehensive validation.
+
+    Args:
+        df: DataFrame containing all cards
+
+    Returns:
+        DataFrame containing only commander-eligible cards
+
+    Raises:
+        CommanderValidationError: If validation fails for legendary status, special cases, or set legality
+        DataFrameProcessingError: If general processing fails
+    """
+    try:
+        logger.info('Starting commander validation process')
+
+        filtered_df = df.copy()
+        # Step 1: Check legendary status
+        try:
+            with tqdm(total=1, desc='Checking legendary status') as pbar:
+                # Normalize type line for matching
+                type_line = filtered_df['type'].astype(str).str.lower()
+
+                # Base predicates
+                is_legendary = type_line.str.contains('legendary')
+                is_creature = type_line.str.contains('creature')
+                # Planeswalkers are only eligible if they explicitly state they can be your commander (handled in special cases step)
+                is_enchantment = type_line.str.contains('enchantment')
+                is_artifact = type_line.str.contains('artifact')
+                is_vehicle_or_spacecraft = type_line.str.contains('vehicle') | type_line.str.contains('spacecraft')
+
+                # 1. Always allow Legendary Creatures (includes artifact/enchantment creatures already)
+                allow_legendary_creature = is_legendary & is_creature
+
+                # 2. Allow Legendary Enchantment Creature (already covered by legendary creature) – ensure no plain legendary enchantments without creature type slip through
+                allow_enchantment_creature = is_legendary & is_enchantment & is_creature
+
+                # 3. Allow certain Legendary Artifacts:
+                #    a) Vehicles/Spacecraft that have printed power & toughness
+                has_power_toughness = filtered_df['power'].notna() & filtered_df['toughness'].notna()
+                allow_artifact_vehicle = is_legendary & is_artifact & is_vehicle_or_spacecraft & has_power_toughness
+
+                # (Artifacts or planeswalkers with explicit permission text will be added in special cases step.)
+
+                baseline_mask = allow_legendary_creature | allow_enchantment_creature | allow_artifact_vehicle
+                filtered_df = filtered_df[baseline_mask].copy()
+
+                if filtered_df.empty:
+                    raise CommanderValidationError(
+                        "No baseline eligible commanders found",
+                        "legendary_check",
+                        "After applying commander rules no cards qualified"
+                    )
+
+                logger.debug(
+                    "Baseline commander counts: total=%d legendary_creatures=%d enchantment_creatures=%d artifact_vehicles=%d", 
+                    len(filtered_df),
+                    int((allow_legendary_creature).sum()),
+                    int((allow_enchantment_creature).sum()),
+                    int((allow_artifact_vehicle).sum())
+                )
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Legendary status check failed",
+                "legendary_check",
+                str(e)
+            ) from e
+
+        # Step 2: Validate special cases
+        try:
+            with tqdm(total=1, desc='Validating special cases') as pbar:
+                # Add any card (including planeswalkers, artifacts, non-legendary cards) that explicitly allow being a commander
+                special_cases = df['text'].str.contains('can be your commander', na=False, case=False)
+                special_commanders = df[special_cases].copy()
+                filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
+                logger.debug(f'Added {len(special_commanders)} special commander cards')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Special case validation failed",
+                "special_cases",
+                str(e)
+            ) from e
+
+        # Step 3: Verify set legality
+        try:
+            with tqdm(total=1, desc='Verifying set legality') as pbar:
+                initial_count = len(filtered_df)
+                for set_code in NON_LEGAL_SETS:
+                    filtered_df = filtered_df[
+                        ~filtered_df['printings'].str.contains(set_code, na=False)
+                    ]
+                removed_count = initial_count - len(filtered_df)
+                logger.debug(f'Removed {removed_count} cards from illegal sets')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Set legality verification failed",
+                "set_legality",
+                str(e)
+            ) from e
+        filtered_df = _enforce_primary_face_commander_rules(filtered_df, df)
+
+        logger.info('Commander validation complete. %d valid commanders found', len(filtered_df))
+        return filtered_df
+
+    except CommanderValidationError:
+        raise
+    except Exception as e:
+        raise DataFrameProcessingError(
+            "Failed to process legendary cards",
+            "commander_processing",
+            str(e)
+        ) from e
+
+def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
+                         include_commander_cols: bool = False, skip_availability_checks: bool = False) -> CardLibraryDF:
+    """Process DataFrame with common operations in batches.
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+        skip_availability_checks: Whether to skip availability and security checks (default: False)
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+
+    Returns:
+        CardLibraryDF: Processed DataFrame with standardized structure
+    """
+    logger.info("Processing card DataFrame...")
+
+    if columns_to_keep is None:
+        columns_to_keep = TAGGED_COLUMN_ORDER.copy()
+        if include_commander_cols:
+            commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
+            columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
+
+    # Fill NA values
+    df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+    df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
+
+    # Process in batches
+    total_batches = len(df) // batch_size + 1
+    processed_dfs = []
+
+    for i in tqdm(range(total_batches), desc="Processing batches"):
+        start_idx = i * batch_size
+        end_idx = min((i + 1) * batch_size, len(df))
+        batch = df.iloc[start_idx:end_idx].copy()
+
+        if not skip_availability_checks:
+            columns_to_keep = COLUMN_ORDER.copy()
+            logger.debug("Performing column checks...")
+            # Common processing steps
+            batch = batch[batch['availability'].str.contains('paper', na=False)]
+            batch = batch.loc[batch['layout'] != 'reversible_card']
+            batch = batch.loc[batch['promoTypes'] != 'playtest']
+            batch = batch.loc[batch['securityStamp'] != 'heart']
+            batch = batch.loc[batch['securityStamp'] != 'acorn']
+            # Keep only specified columns
+            batch = batch[columns_to_keep]
+            processed_dfs.append(batch)
+        else:
+            logger.debug("Skipping column checks...")
+            # Even when skipping availability checks, still ensure columns_to_keep if provided
+            if columns_to_keep is not None:
+                try:
+                    batch = batch[columns_to_keep]
+                except Exception:
+                    # If requested columns are not present, keep as-is
+                    pass
+            processed_dfs.append(batch)
+
+    # Combine processed batches
+    result = pd.concat(processed_dfs, ignore_index=True)
+
+    # Final processing
+    result.drop_duplicates(subset='faceName', keep='first', inplace=True)
+    result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
+
+    logger.info("DataFrame processing completed")
+    return result
+
+# Backward-compatibility wrapper used by deck_builder.builder
+def regenerate_csvs_all() -> None:  # pragma: no cover - simple delegator
+    """Delegate to setup.regenerate_csvs_all to preserve existing imports.
+
+    Some modules import regenerate_csvs_all from setup_utils. Keep this
+    function as a stable indirection to avoid breaking callers.
+    """
+    from . import setup as setup_module  # local import to avoid circular import
+    setup_module.regenerate_csvs_all()
--- a/code/file_setup/setup.py
+++ b/code/file_setup/setup.py
@ -1,362 +1,374 @@
-"""MTG Python Deckbuilder setup module.
+"""Parquet-based setup for MTG Python Deckbuilder.

-This module provides the main setup functionality for the MTG Python Deckbuilder
-application. It handles initial setup tasks such as downloading card data,
-creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+This module handles downloading and processing MTGJSON Parquet data for the
+MTG Python Deckbuilder. It replaces the old CSV-based multi-file approach
+with a single-file Parquet workflow.

-        logger.info('Loading and processing card data')
-        try:
-            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
-        except pd.errors.ParserError as e:
-            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
-            df = pd.read_csv(
-                f'{CSV_DIRECTORY}/cards.csv',
-                low_memory=False,
-                on_bad_lines='warn',  # Warn about malformed rows but continue
-                encoding_errors='replace'  # Replace bad encoding chars
-            )
-            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+Key Changes from CSV approach:
+- Single all_cards.parquet file instead of 18+ color-specific CSVs
+- Downloads from MTGJSON Parquet API (faster, smaller)
+- Adds isCommander and isBackground boolean flags
+- Filters to essential columns only (14 base + 4 custom = 18 total)
+- Uses DataLoader abstraction for format flexibility

-        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
-
-Key Features:
-    - Initial setup and configuration
-    - Card data download and processing
-    - Color-based card filtering
-    - Commander card list generation
-    - CSV file management and validation
-
-The module works in conjunction with setup_utils.py for utility functions and
-exceptions.py for error handling.
+Introduced in v3.0.0 as part of CSV→Parquet migration.
 """

 from __future__ import annotations

-# Standard library imports
-from enum import Enum
 import os
-from typing import List, Dict, Any

-# Third-party imports (optional)
-try:
-    import inquirer  # type: ignore
-except Exception:
-    inquirer = None  # Fallback to simple input-based menu when unavailable
 import pandas as pd
+import requests
+from tqdm import tqdm

-# Local imports
+from .data_loader import DataLoader, validate_schema
+from .setup_constants import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    BANNED_CARDS,
+    FILTER_CONFIG,
+    SORT_CONFIG,
+)
 import logging_util
-from settings import CSV_DIRECTORY
-from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
-from .setup_utils import (
-    download_cards_csv,
-    filter_dataframe,
-    process_legendary_cards,
-    check_csv_exists,
-    save_color_filtered_csvs,
-    enrich_commander_rows_with_tags,
-)
-from exceptions import (
-    CSVFileNotFoundError,
-    CommanderValidationError,
-    MTGJSONDownloadError
-)
-from scripts import generate_background_cards as background_cards_script
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
+from path_util import card_files_raw_dir, get_processed_cards_path
+import settings
+
+logger = logging_util.get_logger(__name__)
+
+# MTGJSON Parquet API URL
+MTGJSON_PARQUET_URL = "https://mtgjson.com/api/v5/parquet/cards.parquet"


-def _generate_background_catalog(cards_path: str, output_path: str) -> None:
-    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
-
-    logger.info('Generating background cards catalog')
-    args = [
-        '--source', cards_path,
-        '--output', output_path,
-    ]
-    try:
-        background_cards_script.main(args)
-    except Exception:  # pragma: no cover - surfaced to caller/test
-        logger.exception('Failed to generate background catalog')
-        raise
-    else:
-        logger.info('Background cards catalog generated successfully')
-
-# Create logger for this module
-logger = logging_util.logging.getLogger(__name__)
-logger.setLevel(logging_util.LOG_LEVEL)
-logger.addHandler(logging_util.file_handler)
-logger.addHandler(logging_util.stream_handler)
-
-# Create CSV directory if it doesn't exist
-if not os.path.exists(CSV_DIRECTORY):
-    os.makedirs(CSV_DIRECTORY)
-
-## Note: using shared check_csv_exists from setup_utils to avoid duplication
-
-def initial_setup() -> None:
-    """Perform initial setup by downloading card data and creating filtered CSV files.
-    
-    Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
-    and generates commander-eligible cards list. Uses utility functions from setup_utils.py
-    for file operations and data processing.
-    
-    Raises:
-        CSVFileNotFoundError: If required CSV files cannot be found
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
-    """
-    logger.info('Checking for cards.csv file')
-    
-    try:
-        cards_file = f'{CSV_DIRECTORY}/cards.csv'
-        try:
-            with open(cards_file, 'r', encoding='utf-8'):
-                logger.info('cards.csv exists')
-        except FileNotFoundError:
-            logger.info('cards.csv not found, downloading from mtgjson')
-            download_cards_csv(MTGJSON_API_URL, cards_file)
-        
-        df = pd.read_csv(cards_file, low_memory=False)
-        
-        logger.info('Checking for color identity sorted files')
-        # Generate color-identity filtered CSVs in one pass
-        save_color_filtered_csvs(df, CSV_DIRECTORY)
-        
-        # Generate commander list
-        determine_commanders()
-
-    except Exception as e:
-        logger.error(f'Error during initial setup: {str(e)}')
-        raise
-
-## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
-
-def determine_commanders() -> None:
-    """Generate commander_cards.csv containing all cards eligible to be commanders.
-    
-    This function processes the card database to identify and validate commander-eligible cards,
-    applying comprehensive validation steps and filtering criteria.
-    
-    Raises:
-        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
-        MTGJSONDownloadError: If downloading cards data fails
-        CommanderValidationError: If commander validation fails
-        DataFrameProcessingError: If data processing operations fail
-    """
-    logger.info('Starting commander card generation process')
-    
-    try:
-        # Check for cards.csv with progress tracking
-        cards_file = f'{CSV_DIRECTORY}/cards.csv'
-        if not check_csv_exists(cards_file):
-            logger.info('cards.csv not found, initiating download')
-            download_cards_csv(MTGJSON_API_URL, cards_file)
-        else:
-            logger.info('cards.csv found, proceeding with processing')
-        
-        # Load and process cards data
-        logger.info('Loading card data from CSV')
-        df = pd.read_csv(cards_file, low_memory=False)
-        
-        # Process legendary cards with validation
-        logger.info('Processing and validating legendary cards')
-        try:
-            filtered_df = process_legendary_cards(df)
-        except CommanderValidationError as e:
-            logger.error(f'Commander validation failed: {str(e)}')
-            raise
-        
-        # Apply standard filters
-        logger.info('Applying standard card filters')
-        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
-        
-        logger.info('Enriching commander metadata with theme and creature tags')
-        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
-
-        # Save commander cards
-        logger.info('Saving validated commander cards')
-        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
-        filtered_df.to_csv(commander_path, index=False)
-
-        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
-        _generate_background_catalog(cards_file, background_output)
-
-        logger.info('Commander card generation completed successfully')
-        
-    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
-        logger.error(f'File operation error: {str(e)}')
-        raise
-    except CommanderValidationError as e:
-        logger.error(f'Commander validation error: {str(e)}')
-        raise
-    except Exception as e:
-        logger.error(f'Unexpected error during commander generation: {str(e)}')
-        raise
-    
-def regenerate_csvs_all() -> None:
-    """Regenerate all color-filtered CSV files from latest card data.
-    
-    Downloads fresh card data and recreates all color-filtered CSV files.
-    Useful for updating the card database when new sets are released.
-    
-    Raises:
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
-    """
-    try:
-        logger.info('Downloading latest card data from MTGJSON')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
-        
-        logger.info('Loading and processing card data')
-        try:
-            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
-        except pd.errors.ParserError as e:
-            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
-            df = pd.read_csv(
-                f'{CSV_DIRECTORY}/cards.csv',
-                low_memory=False,
-                on_bad_lines='warn',  # Warn about malformed rows but continue
-                encoding_errors='replace'  # Replace bad encoding chars
-            )
-            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
-        
-        logger.info('Regenerating color identity sorted files')
-        save_color_filtered_csvs(df, CSV_DIRECTORY)
-            
-        logger.info('Regenerating commander cards')
-        determine_commanders()
-        
-        logger.info('Card database regeneration complete')
-        
-    except Exception as e:
-        logger.error(f'Failed to regenerate card database: {str(e)}')
-        raise
-    # Once files are regenerated, create a new legendary list (already executed in try)
-
-def regenerate_csv_by_color(color: str) -> None:
-    """Regenerate CSV file for a specific color identity.
+def download_parquet_from_mtgjson(output_path: str) -> None:
+    """Download MTGJSON cards.parquet file.
    
    Args:
-        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        output_path: Where to save the downloaded Parquet file
        
    Raises:
-        ValueError: If color is not valid
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
+        requests.RequestException: If download fails
+        IOError: If file cannot be written
    """
+    logger.info(f"Downloading MTGJSON Parquet from {MTGJSON_PARQUET_URL}")
+    
    try:
-        if color not in SETUP_COLORS:
-            raise ValueError(f'Invalid color: {color}')
-
-        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
-
-        logger.info(f'Downloading latest card data for {color} cards')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
-
-        logger.info('Loading and processing card data')
-        df = pd.read_csv(
-            f'{CSV_DIRECTORY}/cards.csv',
-            low_memory=False,
-            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
-            encoding_errors='replace'  # Replace bad encoding chars
-        )
-
-        logger.info(f'Regenerating {color} cards CSV')
-        # Use shared utilities to base-filter once then slice color, honoring bans
-        base_df = filter_dataframe(df, BANNED_CARDS)
-        base_df[base_df['colorIdentity'] == color_abv].to_csv(
-            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
-        )
-
-        logger.info(f'Successfully regenerated {color} cards database')
-
-    except Exception as e:
-        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        response = requests.get(MTGJSON_PARQUET_URL, stream=True, timeout=60)
+        response.raise_for_status()
+        
+        # Get file size for progress bar
+        total_size = int(response.headers.get('content-length', 0))
+        
+        # Ensure output directory exists
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # Download with progress bar
+        with open(output_path, 'wb') as f, tqdm(
+            total=total_size,
+            unit='B',
+            unit_scale=True,
+            desc='Downloading cards.parquet'
+        ) as pbar:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+                pbar.update(len(chunk))
+        
+        logger.info(f"✓ Downloaded {total_size / (1024**2):.2f} MB to {output_path}")
+        
+    except requests.RequestException as e:
+        logger.error(f"Failed to download MTGJSON Parquet: {e}")
+        raise
+    except IOError as e:
+        logger.error(f"Failed to write Parquet file: {e}")
        raise

-class SetupOption(Enum):
-    """Enum for setup menu options."""
-    INITIAL_SETUP = 'Initial Setup'
-    REGENERATE_CSV = 'Regenerate CSV Files'
-    BACK = 'Back'

-def _display_setup_menu() -> SetupOption:
-    """Display the setup menu and return the selected option.
+def is_valid_commander(row: pd.Series) -> bool:
+    """Determine if a card can be a commander.
    
-    Returns:
-        SetupOption: The selected menu option
-    """
-    if inquirer is not None:
-        question: List[Dict[str, Any]] = [
-            inquirer.List(
-                'menu',
-                choices=[option.value for option in SetupOption],
-                carousel=True)]
-        answer = inquirer.prompt(question)
-        return SetupOption(answer['menu'])
-
-    # Simple fallback when inquirer isn't installed (e.g., headless/container)
-    options = list(SetupOption)
-    print("\nSetup Menu:")
-    for idx, opt in enumerate(options, start=1):
-        print(f"  {idx}) {opt.value}")
-    while True:
-        try:
-            sel = input("Select an option [1]: ").strip() or "1"
-            i = int(sel)
-            if 1 <= i <= len(options):
-                return options[i - 1]
-        except KeyboardInterrupt:
-            print("")
-            return SetupOption.BACK
-        except Exception:
-            pass
-        print("Invalid selection. Please try again.")
-
-def setup() -> bool:
-    """Run the setup process for the MTG Python Deckbuilder.
+    Criteria:
+    - Legendary Creature
+    - OR: Has "can be your commander" in text
+    - OR: Background (Partner with Background)
    
-    This function provides a menu-driven interface to:
-    1. Perform initial setup by downloading and processing card data
-    2. Regenerate CSV files with updated card data
-    3. Perform all tagging processes on the color-sorted csv files
-    
-    The function handles errors gracefully and provides feedback through logging.
-    
-    Returns:
-        bool: True if setup completed successfully, False otherwise
-    """
-    try:
-        print('Which setup operation would you like to perform?\n'
-              'If this is your first time setting up, do the initial setup.\n'
-              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+    Args:
+        row: DataFrame row with card data
        
-        choice = _display_setup_menu()
-        
-        if choice == SetupOption.INITIAL_SETUP:
-            logger.info('Starting initial setup')
-            initial_setup()
-            logger.info('Initial setup completed successfully')
-            return True
-            
-        elif choice == SetupOption.REGENERATE_CSV:
-            logger.info('Starting CSV regeneration')
-            regenerate_csvs_all()
-            logger.info('CSV regeneration completed successfully')
-            return True
-            
-        elif choice == SetupOption.BACK:
-            logger.info('Setup cancelled by user')
-            return False
-            
-    except Exception as e:
-        logger.error(f'Error during setup: {e}')
-        raise
+    Returns:
+        True if card can be a commander
+    """
+    type_line = str(row.get('type', ''))
+    text = str(row.get('text', '')).lower()
+    
+    # Legendary Creature
+    if 'Legendary' in type_line and 'Creature' in type_line:
+        return True
+    
+    # Special text (e.g., "can be your commander")
+    if 'can be your commander' in text:
+        return True
+    
+    # Backgrounds can be commanders (with Choose a Background)
+    if 'Background' in type_line:
+        return True
    
    return False
+
+
+def is_background(row: pd.Series) -> bool:
+    """Determine if a card is a Background.
+    
+    Args:
+        row: DataFrame row with card data
+        
+    Returns:
+        True if card has Background type
+    """
+    type_line = str(row.get('type', ''))
+    return 'Background' in type_line
+
+
+def extract_creature_types(row: pd.Series) -> str:
+    """Extract creature types from type line.
+    
+    Args:
+        row: DataFrame row with card data
+        
+    Returns:
+        Comma-separated creature types or empty string
+    """
+    type_line = str(row.get('type', ''))
+    
+    # Check if it's a creature
+    if 'Creature' not in type_line:
+        return ''
+    
+    # Split on — to get subtypes
+    if '—' in type_line:
+        parts = type_line.split('—')
+        if len(parts) >= 2:
+            # Get everything after the dash, strip whitespace
+            subtypes = parts[1].strip()
+            return subtypes
+    
+    return ''
+
+
+def process_raw_parquet(raw_path: str, output_path: str) -> pd.DataFrame:
+    """Process raw MTGJSON Parquet into processed all_cards.parquet.
+    
+    This function:
+    1. Loads raw Parquet (all ~82 columns)
+    2. Filters to essential columns (CSV_PROCESSING_COLUMNS)
+    3. Applies standard filtering (banned cards, illegal sets, special types)
+    4. Deduplicates by faceName (keep first printing only)
+    5. Adds custom columns: creatureTypes, themeTags, isCommander, isBackground
+    6. Validates schema
+    7. Writes to processed directory
+    
+    Args:
+        raw_path: Path to raw cards.parquet from MTGJSON
+        output_path: Path to save processed all_cards.parquet
+        
+    Returns:
+        Processed DataFrame
+        
+    Raises:
+        ValueError: If schema validation fails
+    """
+    logger.info(f"Processing {raw_path}")
+    
+    # Load raw Parquet with DataLoader
+    loader = DataLoader()
+    df = loader.read_cards(raw_path)
+    
+    logger.info(f"Loaded {len(df)} cards with {len(df.columns)} columns")
+    
+    # Step 1: Fill NA values
+    logger.info("Filling NA values")
+    for col, fill_value in settings.FILL_NA_COLUMNS.items():
+        if col in df.columns:
+            if col == 'faceName':
+                df[col] = df[col].fillna(df['name'])
+            else:
+                df[col] = df[col].fillna(fill_value)
+    
+    # Step 2: Apply configuration-based filters (FILTER_CONFIG)
+    logger.info("Applying configuration filters")
+    for field, rules in FILTER_CONFIG.items():
+        if field not in df.columns:
+            logger.warning(f"Skipping filter for missing field: {field}")
+            continue
+        
+        for rule_type, values in rules.items():
+            if not values:
+                continue
+            
+            if rule_type == 'exclude':
+                for value in values:
+                    mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
+                    before = len(df)
+                    df = df[~mask]
+                    logger.debug(f"Excluded {field} containing '{value}': {before - len(df)} removed")
+            elif rule_type == 'require':
+                for value in values:
+                    mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
+                    before = len(df)
+                    df = df[mask]
+                    logger.debug(f"Required {field} containing '{value}': {before - len(df)} removed")
+    
+    # Step 3: Remove illegal sets
+    if 'printings' in df.columns:
+        logger.info("Removing illegal sets")
+        for set_code in NON_LEGAL_SETS:
+            before = len(df)
+            df = df[~df['printings'].str.contains(set_code, na=False)]
+            if len(df) < before:
+                logger.debug(f"Removed set {set_code}: {before - len(df)} cards")
+    
+    # Step 4: Remove banned cards
+    logger.info("Removing banned cards")
+    banned_set = {b.casefold() for b in BANNED_CARDS}
+    name_lc = df['name'].astype(str).str.casefold()
+    face_lc = df['faceName'].astype(str).str.casefold() if 'faceName' in df.columns else name_lc
+    mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
+    before = len(df)
+    df = df[mask]
+    logger.debug(f"Removed banned cards: {before - len(df)} filtered out")
+    
+    # Step 5: Remove special card types
+    logger.info("Removing special card types")
+    for card_type in CARD_TYPES_TO_EXCLUDE:
+        before = len(df)
+        df = df[~df['type'].str.contains(card_type, na=False)]
+        if len(df) < before:
+            logger.debug(f"Removed type {card_type}: {before - len(df)} cards")
+    
+    # Step 6: Filter to essential columns only (reduce from ~82 to 14)
+    logger.info(f"Filtering to {len(CSV_PROCESSING_COLUMNS)} essential columns")
+    df = df[CSV_PROCESSING_COLUMNS]
+    
+    # Step 7: Sort and deduplicate (CRITICAL: keeps only one printing per unique card)
+    logger.info("Sorting and deduplicating cards")
+    df = df.sort_values(
+        by=SORT_CONFIG['columns'],
+        key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
+    )
+    before = len(df)
+    df = df.drop_duplicates(subset='faceName', keep='first')
+    logger.info(f"Deduplicated: {before} → {len(df)} cards ({before - len(df)} duplicate printings removed)")
+    
+    # Step 8: Add custom columns
+    logger.info("Adding custom columns: creatureTypes, themeTags, isCommander, isBackground")
+    
+    # creatureTypes: extracted from type line
+    df['creatureTypes'] = df.apply(extract_creature_types, axis=1)
+    
+    # themeTags: empty placeholder (filled during tagging)
+    df['themeTags'] = ''
+    
+    # isCommander: boolean flag
+    df['isCommander'] = df.apply(is_valid_commander, axis=1)
+    
+    # isBackground: boolean flag
+    df['isBackground'] = df.apply(is_background, axis=1)
+    
+    # Reorder columns to match CARD_DATA_COLUMNS
+    # CARD_DATA_COLUMNS has: name, faceName, edhrecRank, colorIdentity, colors,
+    #                        manaCost, manaValue, type, creatureTypes, text,
+    #                        power, toughness, keywords, themeTags, layout, side
+    # We need to add isCommander and isBackground at the end
+    final_columns = settings.CARD_DATA_COLUMNS + ['isCommander', 'isBackground']
+    
+    # Ensure all columns exist
+    for col in final_columns:
+        if col not in df.columns:
+            logger.warning(f"Column {col} missing, adding empty column")
+            df[col] = ''
+    
+    df = df[final_columns]
+    
+    logger.info(f"Final dataset: {len(df)} cards, {len(df.columns)} columns")
+    logger.info(f"Commanders: {df['isCommander'].sum()}")
+    logger.info(f"Backgrounds: {df['isBackground'].sum()}")
+    
+    # Validate schema (check required columns present)
+    try:
+        validate_schema(df)
+        logger.info("✓ Schema validation passed")
+    except ValueError as e:
+        logger.error(f"Schema validation failed: {e}")
+        raise
+    
+    # Write to processed directory
+    logger.info(f"Writing processed Parquet to {output_path}")
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    loader.write_cards(df, output_path)
+    
+    logger.info(f"✓ Created {output_path}")
+    
+    return df
+
+
+def initial_setup() -> None:
+    """Download and process MTGJSON Parquet data.
+    
+    Modern Parquet-based setup workflow (replaces legacy CSV approach).
+    
+    Workflow:
+    1. Download cards.parquet from MTGJSON → card_files/raw/cards.parquet
+    2. Process and filter → card_files/processed/all_cards.parquet
+    3. No color-specific files (filter at query time instead)
+    
+    Raises:
+        Various exceptions from download/processing steps
+    """
+    logger.info("=" * 80)
+    logger.info("Starting Parquet-based initial setup")
+    logger.info("=" * 80)
+    
+    # Step 1: Download raw Parquet
+    raw_dir = card_files_raw_dir()
+    raw_path = os.path.join(raw_dir, "cards.parquet")
+    
+    if os.path.exists(raw_path):
+        logger.info(f"Raw Parquet already exists: {raw_path}")
+        logger.info("Skipping download (delete file to re-download)")
+    else:
+        download_parquet_from_mtgjson(raw_path)
+    
+    # Step 2: Process raw → processed
+    processed_path = get_processed_cards_path()
+    
+    logger.info(f"Processing raw Parquet → {processed_path}")
+    process_raw_parquet(raw_path, processed_path)
+    
+    logger.info("=" * 80)
+    logger.info("✓ Parquet setup complete")
+    logger.info(f"  Raw: {raw_path}")
+    logger.info(f"  Processed: {processed_path}")
+    logger.info("=" * 80)
+
+
+def regenerate_processed_parquet() -> None:
+    """Regenerate processed Parquet from existing raw file.
+    
+    Useful when:
+    - Column processing logic changes
+    - Adding new custom columns
+    - Testing without re-downloading
+    """
+    logger.info("Regenerating processed Parquet from raw file")
+    
+    raw_path = os.path.join(card_files_raw_dir(), "cards.parquet")
+    
+    if not os.path.exists(raw_path):
+        logger.error(f"Raw Parquet not found: {raw_path}")
+        logger.error("Run initial_setup_parquet() first to download")
+        raise FileNotFoundError(f"Raw Parquet not found: {raw_path}")
+    
+    processed_path = get_processed_cards_path()
+    process_raw_parquet(raw_path, processed_path)
+    
+    logger.info(f"✓ Regenerated {processed_path}")
--- a/code/file_setup/setup_constants.py
+++ b/code/file_setup/setup_constants.py
@ -16,8 +16,8 @@ __all__ = [
 # Banned cards consolidated here (remains specific to setup concerns)
 BANNED_CARDS: List[str] = [
    # Commander banned list
-    'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
-    'Chaos Orb', 'Channel', 'Dockside Extortionist',
+    '1996 World Champion', 'Ancestral Recall', 'Balance', 'Biorhythm',
+    'Black Lotus', 'Chaos Orb', 'Channel', 'Dockside Extortionist',
    'Emrakul, the Aeons Torn',
    'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
    'Flash', 'Golos, Tireless Pilgrim',