Noticed that changes made in setup have drastically reduced cards after filtering. Redoing setup and it's associated files to fix this

2025-12-17 16:10:12 +01:00 · 2025-01-13 10:06:19 -08:00 · 2025-01-13 10:06:19 -08:00 · c4d773d663
commit c4d773d663
parent c1d6b5ce18
4 changed files with 369 additions and 117 deletions
--- a/exceptions.py
+++ b/exceptions.py
@ -0,0 +1,85 @@
 """Custom exceptions for MTG Python Deckbuilder setup operations."""
 class MTGSetupError(Exception):
    """Base exception class for MTG setup-related errors."""
    pass
 class CSVFileNotFoundError(MTGSetupError):
    """Exception raised when a required CSV file is not found.
    This exception is raised when attempting to access or process a CSV file
    that does not exist in the expected location.
    Args:
        message: Explanation of the error
        filename: Name of the missing CSV file
    """
    def __init__(self, message: str, filename: str) -> None:
        self.filename = filename
        super().__init__(f"{message}: {filename}")
 class MTGJSONDownloadError(MTGSetupError):
    """Exception raised when downloading data from MTGJSON fails.
    This exception is raised when there are issues downloading card data
    from the MTGJSON API, such as network errors or API failures.
    Args:
        message: Explanation of the error
        url: The URL that failed to download
        status_code: HTTP status code if available
    """
    def __init__(self, message: str, url: str, status_code: int = None) -> None:
        self.url = url
        self.status_code = status_code
        status_info = f" (Status: {status_code})" if status_code else ""
        super().__init__(f"{message}: {url}{status_info}")
 class DataFrameProcessingError(MTGSetupError):
    """Exception raised when DataFrame operations fail during setup.
    This exception is raised when there are issues processing card data
    in pandas DataFrames, such as filtering, sorting, or transformation errors.
    Args:
        message: Explanation of the error
        operation: The DataFrame operation that failed (e.g., 'color_filtering', 'commander_processing')
        details: Additional error details
    Examples:
        >>> raise DataFrameProcessingError(
        ...     "Invalid color identity",
        ...     "color_filtering",
        ...     "Color 'P' is not a valid MTG color"
        ... )
    """
    def __init__(self, message: str, operation: str, details: str = None) -> None:
        self.operation = operation
        self.details = details
        error_info = f" - {details}" if details else ""
        super().__init__(f"{message} during {operation}{error_info}")
 class ColorFilterError(MTGSetupError):
    """Exception raised when color-specific filtering operations fail.
    This exception is raised when there are issues filtering cards by color,
    such as invalid color specifications or color identity processing errors.
    Args:
        message: Explanation of the error
        color: The color value that caused the error
        details: Additional error details
    Examples:
        >>> raise ColorFilterError(
        ...     "Invalid color specification",
        ...     "Purple",
        ...     "Color must be one of: W, U, B, R, G, or C"
        ... )
    """
    def __init__(self, message: str, color: str, details: str = None) -> None:
        self.color = color
        self.details = details
        error_info = f" - {details}" if details else ""
        super().__init__(f"{message} for color '{color}'{error_info}")
--- a/settings.py
+++ b/settings.py
@ -764,3 +764,63 @@ VOLTRON_PATTERNS = [
    'living weapon',
    'reconfigure'
 ]
 # Constants for setup and CSV processing
 MTGJSON_API_URL = 'https://mtgjson.com/api/v5/csv/cards.csv'
 LEGENDARY_OPTIONS = [
    'Legendary Creature',
    'Legendary Artifact',
    'Legendary Artifact Creature', 
    'Legendary Enchantment Creature',
    'Legendary Planeswalker'
 ]
 NON_LEGAL_SETS = [
    'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
    'UGL', 'UND', 'UNH', 'UST'
 ]
 CARD_TYPES_TO_EXCLUDE = [
    'Plane —',
    'Conspiracy',
    'Vanguard', 
    'Scheme',
    'Phenomenon',
    'Stickers',
    'Attraction',
    'Hero',
    'Contraption'
 ]
 CSV_PROCESSING_COLUMNS = [
    'name',
    'faceName',
    'edhrecRank',
    'colorIdentity',
    'colors',
    'manaCost', 
    'manaValue',
    'type',
    'layout',
    'text',
    'power',
    'toughness', 
    'keywords',
    'side'
 ]
 SETUP_COLORS = ['colorless', 'white', 'blue', 'black', 'green', 'red',
          'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
          'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
          'bant', 'esper', 'grixis', 'jund', 'naya',
          'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
          'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
 COLOR_ABRV = ['Colorless', 'W', 'U', 'B', 'G', 'R',
              'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
              'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
              'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
              'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
              'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
              'B, R, U, W', 'B, G, R, U, W']
--- a/setup.py
+++ b/setup.py
@ -3,23 +3,18 @@ from __future__ import annotations
 import pandas as pd # type: ignore
 import requests # type: ignore
 import inquirer.prompt # type: ignore
 import logging
-from settings import banned_cards, csv_directory
+from settings import banned_cards, csv_directory, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
 from setup_utils import download_cards_csv, filter_dataframe, process_legendary_cards
-colors = ['colorless', 'white', 'blue', 'black', 'green', 'red',
+# Configure logging
-          'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
+logging.basicConfig(
-          'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
+    level=logging.INFO,
-          'bant', 'esper', 'grixis', 'jund', 'naya',
+    format='%(asctime)s - %(levelname)s - %(message)s',
-          'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
+    datefmt='%Y-%m-%d %H:%M:%S'
-          'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
+)
-
+logger = logging.getLogger(__name__)
 color_abrv = ['Colorless', 'W', 'U', 'B', 'G', 'R',
              'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
              'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
              'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
              'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
              'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
              'B, R, U, W', 'B, G, R, U, W']
 def filter_by_color(df, column_name, value, new_csv_name):
    # Filter dataframe
@ -54,110 +49,76 @@ def filter_by_color(df, column_name, value, new_csv_name):
 def determine_commanders():
    print('Generating commander_cards.csv, containing all cards elligible to be commanders.')
-    # Filter dataframe
+    try:
-    while True:
+        # Check for cards.csv
        cards_file = f'{csv_directory}/cards.csv'
        try:
-            with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
+            with open(cards_file, 'r', encoding='utf-8'):
                print('cards.csv exists.')
                break
        except FileNotFoundError:
            # If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
            print('cards.csv not found, downloading from mtgjson')
-            url = 'https://mtgjson.com/api/v5/csv/cards.csv'
+            download_cards_csv(MTGJSON_API_URL, cards_file)
            r = requests.get(url)
            with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
                outputfile.write(r.content)
-    # Load cards.csv file into pandas dataframe so it can be further broken down
+        # Load and process cards data
-    df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
+        df = pd.read_csv(cards_file, low_memory=False)
        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
-    # Set frames that have nothing for color identity to be 'Colorless' instead
+        # Process legendary cards
-    df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+        filtered_df = process_legendary_cards(df)
-    legendary_options = ['Legendary Creature','Legendary Artifact', 'Legendary Artifact Creature', 'Legendary Enchantment Creature', 'Legendary Planeswalker']
+        # Apply standard filters
-    filtered_df = df[df['type'].str.contains('|'.join(legendary_options))]
+        filtered_df = filter_dataframe(filtered_df, banned_cards)
    """
    Save the filtered dataframe to a new csv file, and narrow down/rearranges the columns it
    keeps to increase readability/trim some extra data.
    Additionally attempts to remove as many duplicates (including cards with reversible prints,
    as well as taking out Arena-only cards.
    """
    rows_to_drop = []
    non_legel_sets = ['PHTR', 'PH17', 'PH18' ,'PH19', 'PH20', 'PH21', 'UGL', 'UND', 'UNH', 'UST',]
    for index, row in filtered_df.iterrows():
        if ('Legendary Artifact' in row['type']
            or 'Legendary Planeswalker' in row['type']):
            if 'Legendary Artifact Creature' not in row['type']:
                if pd.notna(row['text']):
                    if f'{row['name']} can be your commander' not in row['text']:
                        rows_to_drop.append(index)
        for illegal_set in non_legel_sets:
            if illegal_set in row['printings']:
                rows_to_drop.append(index)
-    filtered_df = filtered_df.drop(rows_to_drop)
+        # Save commander cards
        filtered_df.to_csv(f'{csv_directory}/commander_cards.csv', index=False)
        print('commander_cards.csv file generated.')
-    filtered_df.sort_values('name')
+    except Exception as e:
-    filtered_df = filtered_df.loc[filtered_df['layout'] != 'reversible_card'] 
+        print(f'Error generating commander cards: {str(e)}')
-    filtered_df = filtered_df[filtered_df['availability'].str.contains('paper')]
+        raise
    filtered_df = filtered_df.loc[filtered_df['promoTypes'] != 'playtest']
    filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'heart']
    filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'acorn']
    for card in banned_cards:
        filtered_df = filtered_df[~filtered_df['name'].str.contains(card)]
    card_types = ['Plane —', 'Conspiracy', 'Vanguard', 'Scheme', 'Phenomenon', 'Stickers', 'Attraction', 'Hero', 'Contraption']
    for card_type in card_types:
        filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type)]
    filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
    filtered_df.drop_duplicates(subset='faceName', keep='first', inplace=True)
    columns_to_keep = ['name', 'faceName','edhrecRank','colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'layout', 'text', 'power', 'toughness', 'keywords', 'side']
    filtered_df = filtered_df[columns_to_keep]
    filtered_df.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
    filtered_df.to_csv(f'{csv_directory}/commander_cards.csv', index=False)
    print('commander_cards.csv file generated.')
 def initial_setup():
-    print('Checking for cards.csv file.\n')
+    """Perform initial setup by downloading card data and creating filtered CSV files.
-    while True:
+    
    This function:
    1. Downloads the latest card data from MTGJSON if needed
    2. Creates color-filtered CSV files
    3. Generates commander-eligible cards list
    Uses utility functions from setup_utils.py for file operations and data processing.
    Implements proper error handling for file operations and data processing.
    """
    logger.info('Checking for cards.csv file')
    try:
        cards_file = f'{csv_directory}/cards.csv'
        try:
-            with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
+            with open(cards_file, 'r', encoding='utf-8'):
-                print('cards.csv exists.')
+                logger.info('cards.csv exists')
                break
        except FileNotFoundError:
-            # If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
+            logger.info('cards.csv not found, downloading from mtgjson')
-            print('cards.csv not found, downloading from mtgjson')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
            url = 'https://mtgjson.com/api/v5/csv/cards.csv'
            r = requests.get(url)
            with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
                outputfile.write(r.content)
-    # Load cards.csv file into pandas dataframe so it can be further broken down
+        df = pd.read_csv(cards_file, low_memory=False)
-    df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
-    # Set frames that have nothing for color identity to be 'Colorless' instead
+        logger.info('Checking for color identity sorted files')
    df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
-    # Check for and create missing, individual color identity sorted CSVs
+        for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
-    print('Checking for color identity sorted files.\n')
+            logger.info(f'Checking for {SETUP_COLORS[i]}_cards.csv')
            try:
                with open(f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv', 'r', encoding='utf-8'):
                    logger.info(f'{SETUP_COLORS[i]}_cards.csv exists')
            except FileNotFoundError:
                logger.info(f'{SETUP_COLORS[i]}_cards.csv not found, creating one')
                filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv')
-    # For loop to iterate through the colors
+        # Generate commander list
-    for i in range(min(len(colors), len(color_abrv))):
+        determine_commanders()
        print(f'Checking for {colors[i]}_cards.csv.')
        try:
            with open(f'{csv_directory}/{colors[i]}_cards.csv', 'r', encoding='utf-8'):
                print(f'{colors[i]}_cards.csv exists.\n')
        except FileNotFoundError:
            print(f'{colors[i]}_cards.csv not found, creating one.\n')
            filter_by_color(df, 'colorIdentity', color_abrv[i], f'{csv_directory}/{colors[i]}_cards.csv')
-    # Once by-color lists have been made, Determine legendary creatures
+    except Exception as e:
-    determine_commanders()
+        logger.error(f'Error during initial setup: {str(e)}')
-
+        raise
    # Once Legendary creatures are determined, generate staple lists
    # generate_staple_lists()
 def regenerate_csvs_all():
    """
@ -188,10 +149,10 @@ def regenerate_csvs_all():
    print('Regenerating color identity sorted files.\n')
    # For loop to iterate through the colors
-    for i in range(min(len(colors), len(color_abrv))):
+    for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
-        print(f'Regenerating {colors[i]}_cards.csv.')
+        print(f'Regenerating {SETUP_COLORS[i]}_cards.csv.')
-        filter_by_color(df, 'colorIdentity', color_abrv[i], f'csv_files/{colors[i]}_cards.csv')
+        filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'csv_files/{SETUP_COLORS[i]}_cards.csv')
-        print(f'A new {colors[i]}_cards.csv file has been made.\n')
+        print(f'A new {SETUP_COLORS[i]}_cards.csv file has been made.\n')
    # Once files are regenerated, create a new legendary list
    determine_commanders()
@ -201,8 +162,8 @@ def regenerate_csv_by_color(color):
    Pull the original cards.csv file and remake the {color}_cards.csv files
    """
    # Determine the color_abv to use
-    color_abrv_index = colors.index(color)
+    COLOR_ABRV_index = SETUP_COLORS.index(color)
-    color_abv = color_abrv[color_abrv_index]
+    color_abv = COLOR_ABRV[COLOR_ABRV_index]
    print('Downloading cards.csv from mtgjson')
    url = 'https://mtgjson.com/api/v5/csv/cards.csv'
    r = requests.get(url)
@ -258,7 +219,4 @@ def setup():
            break
        break
-#regenerate_csvs_all()
+initial_setup()
 #regenerate_csv_by_color('white')
 #determine_commanders()
 #set_lands()
--- a/setup_utils.py
+++ b/setup_utils.py
@ -0,0 +1,149 @@
 from __future__ import annotations
 import pandas as pd
 import requests
 import logging
 from tqdm import tqdm
 from pathlib import Path
 from typing import List, Optional, Union
 from settings import (
    CSV_PROCESSING_COLUMNS,
    CARD_TYPES_TO_EXCLUDE,
    NON_LEGAL_SETS,
    LEGENDARY_OPTIONS
 )
 from exceptions import CSVFileNotFoundError, MTGJSONDownloadError, DataFrameProcessingError
 def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
    """Download cards data from MTGJSON and save to CSV.
    Args:
        url: URL to download cards data from
        output_path: Path to save the downloaded CSV file
    Raises:
        MTGJSONDownloadError: If download fails or response is invalid
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        total_size = int(response.headers.get('content-length', 0))
        with open(output_path, 'wb') as f:
            with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
                for chunk in response.iter_content(chunk_size=8192):
                    size = f.write(chunk)
                    pbar.update(size)
    except requests.RequestException as e:
        raise MTGJSONDownloadError(
            "Failed to download cards data",
            url,
            getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
        ) from e
 def check_csv_exists(filepath: Union[str, Path]) -> bool:
    """Check if a CSV file exists at the specified path.
    Args:
        filepath: Path to check for CSV file
    Returns:
        True if file exists, False otherwise
    """
    return Path(filepath).is_file()
 def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
    """Apply standard filters to the cards DataFrame.
    Args:
        df: DataFrame to filter
        banned_cards: List of banned card names to exclude
    Returns:
        Filtered DataFrame
    Raises:
        DataFrameProcessingError: If filtering operations fail
    """
    try:
        # Fill null color identities
        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
        # Basic filters
        filtered_df = df[
            (df['layout'] != 'reversible_card') &
            (df['availability'].str.contains('paper', na=False)) &
            (df['promoTypes'] != 'playtest') &
            (~df['securityStamp'].str.contains('Heart|Acorn', na=False))
        ]
        # Remove illegal sets
        for set_code in NON_LEGAL_SETS:
            filtered_df = filtered_df[
                ~filtered_df['printings'].str.contains(set_code, na=False)
            ]
        # Remove banned cards
        for card in banned_cards:
            filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
        # Remove special card types
        for card_type in CARD_TYPES_TO_EXCLUDE:
            filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
        # Handle face names and duplicates
        filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
        filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
        # Select and sort columns
        filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
        return filtered_df.sort_values(by=['name', 'side'], 
                                     key=lambda col: col.str.lower())
    except Exception as e:
        raise DataFrameProcessingError(
            "Failed to filter DataFrame",
            "standard_filtering",
            str(e)
        ) from e
 def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
    """Process and filter legendary cards for commander eligibility.
    Args:
        df: DataFrame containing all cards
    Returns:
        DataFrame containing only commander-eligible cards
    Raises:
        DataFrameProcessingError: If processing fails
    """
    try:
        # Filter for legendary creatures and eligible cards
        mask = df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
        # Add cards that can be commanders
        can_be_commander = df['text'].str.contains(
            'can be your commander', 
            na=False
        )
        filtered_df = df[mask | can_be_commander].copy()
        # Remove illegal sets
        for set_code in NON_LEGAL_SETS:
            filtered_df = filtered_df[
                ~filtered_df['printings'].str.contains(set_code, na=False)
            ]
        return filtered_df
    except Exception as e:
        raise DataFrameProcessingError(
            "Failed to process legendary cards",
            "commander_processing",
            str(e)
        ) from e