Noticed that changes made in setup have drastically reduced cards after filtering. Redoing setup and it's associated files to fix this

2025-09-22 04:50:46 +02:00 · 2025-01-13 10:06:19 -08:00 · 2025-01-13 10:06:19 -08:00 · c4d773d663
commit c4d773d663
parent c1d6b5ce18
4 changed files with 369 additions and 117 deletions
--- a/exceptions.py
+++ b/exceptions.py
@ -0,0 +1,85 @@
+"""Custom exceptions for MTG Python Deckbuilder setup operations."""
+
+class MTGSetupError(Exception):
+    """Base exception class for MTG setup-related errors."""
+    pass
+
+class CSVFileNotFoundError(MTGSetupError):
+    """Exception raised when a required CSV file is not found.
+    
+    This exception is raised when attempting to access or process a CSV file
+    that does not exist in the expected location.
+    
+    Args:
+        message: Explanation of the error
+        filename: Name of the missing CSV file
+    """
+    def __init__(self, message: str, filename: str) -> None:
+        self.filename = filename
+        super().__init__(f"{message}: {filename}")
+
+class MTGJSONDownloadError(MTGSetupError):
+    """Exception raised when downloading data from MTGJSON fails.
+    
+    This exception is raised when there are issues downloading card data
+    from the MTGJSON API, such as network errors or API failures.
+    
+    Args:
+        message: Explanation of the error
+        url: The URL that failed to download
+        status_code: HTTP status code if available
+    """
+    def __init__(self, message: str, url: str, status_code: int = None) -> None:
+        self.url = url
+        self.status_code = status_code
+        status_info = f" (Status: {status_code})" if status_code else ""
+        super().__init__(f"{message}: {url}{status_info}")
+
+class DataFrameProcessingError(MTGSetupError):
+    """Exception raised when DataFrame operations fail during setup.
+    
+    This exception is raised when there are issues processing card data
+    in pandas DataFrames, such as filtering, sorting, or transformation errors.
+    
+    Args:
+        message: Explanation of the error
+        operation: The DataFrame operation that failed (e.g., 'color_filtering', 'commander_processing')
+        details: Additional error details
+    
+    Examples:
+        >>> raise DataFrameProcessingError(
+        ...     "Invalid color identity",
+        ...     "color_filtering",
+        ...     "Color 'P' is not a valid MTG color"
+        ... )
+    """
+    def __init__(self, message: str, operation: str, details: str = None) -> None:
+        self.operation = operation
+        self.details = details
+        error_info = f" - {details}" if details else ""
+        super().__init__(f"{message} during {operation}{error_info}")
+
+
+class ColorFilterError(MTGSetupError):
+    """Exception raised when color-specific filtering operations fail.
+    
+    This exception is raised when there are issues filtering cards by color,
+    such as invalid color specifications or color identity processing errors.
+    
+    Args:
+        message: Explanation of the error
+        color: The color value that caused the error
+        details: Additional error details
+    
+    Examples:
+        >>> raise ColorFilterError(
+        ...     "Invalid color specification",
+        ...     "Purple",
+        ...     "Color must be one of: W, U, B, R, G, or C"
+        ... )
+    """
+    def __init__(self, message: str, color: str, details: str = None) -> None:
+        self.color = color
+        self.details = details
+        error_info = f" - {details}" if details else ""
+        super().__init__(f"{message} for color '{color}'{error_info}")
--- a/settings.py
+++ b/settings.py
@ -764,3 +764,63 @@ VOLTRON_PATTERNS = [
    'living weapon',
    'reconfigure'
 ]
+
+# Constants for setup and CSV processing
+MTGJSON_API_URL = 'https://mtgjson.com/api/v5/csv/cards.csv'
+
+LEGENDARY_OPTIONS = [
+    'Legendary Creature',
+    'Legendary Artifact',
+    'Legendary Artifact Creature', 
+    'Legendary Enchantment Creature',
+    'Legendary Planeswalker'
+]
+
+NON_LEGAL_SETS = [
+    'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
+    'UGL', 'UND', 'UNH', 'UST'
+]
+
+CARD_TYPES_TO_EXCLUDE = [
+    'Plane —',
+    'Conspiracy',
+    'Vanguard', 
+    'Scheme',
+    'Phenomenon',
+    'Stickers',
+    'Attraction',
+    'Hero',
+    'Contraption'
+]
+
+CSV_PROCESSING_COLUMNS = [
+    'name',
+    'faceName',
+    'edhrecRank',
+    'colorIdentity',
+    'colors',
+    'manaCost', 
+    'manaValue',
+    'type',
+    'layout',
+    'text',
+    'power',
+    'toughness', 
+    'keywords',
+    'side'
+]
+
+SETUP_COLORS = ['colorless', 'white', 'blue', 'black', 'green', 'red',
+          'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
+          'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
+          'bant', 'esper', 'grixis', 'jund', 'naya',
+          'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
+          'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
+
+COLOR_ABRV = ['Colorless', 'W', 'U', 'B', 'G', 'R',
+              'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
+              'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
+              'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
+              'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
+              'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
+              'B, R, U, W', 'B, G, R, U, W']
--- a/setup.py
+++ b/setup.py
@ -3,23 +3,18 @@ from __future__ import annotations
 import pandas as pd # type: ignore
 import requests # type: ignore
 import inquirer.prompt # type: ignore
+import logging

-from settings import banned_cards, csv_directory
+from settings import banned_cards, csv_directory, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from setup_utils import download_cards_csv, filter_dataframe, process_legendary_cards

-colors = ['colorless', 'white', 'blue', 'black', 'green', 'red',
-          'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
-          'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
-          'bant', 'esper', 'grixis', 'jund', 'naya',
-          'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
-          'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
-
-color_abrv = ['Colorless', 'W', 'U', 'B', 'G', 'R',
-              'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
-              'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
-              'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
-              'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
-              'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
-              'B, R, U, W', 'B, G, R, U, W']
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)

 def filter_by_color(df, column_name, value, new_csv_name):
    # Filter dataframe
@ -54,110 +49,76 @@ def filter_by_color(df, column_name, value, new_csv_name):

 def determine_commanders():
    print('Generating commander_cards.csv, containing all cards elligible to be commanders.')
-    # Filter dataframe
-    while True:
    try:
-            with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
+        # Check for cards.csv
+        cards_file = f'{csv_directory}/cards.csv'
+        try:
+            with open(cards_file, 'r', encoding='utf-8'):
                print('cards.csv exists.')
-                break
        except FileNotFoundError:
-            # If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
            print('cards.csv not found, downloading from mtgjson')
-            url = 'https://mtgjson.com/api/v5/csv/cards.csv'
-            r = requests.get(url)
-            with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
-                outputfile.write(r.content)
+            download_cards_csv(MTGJSON_API_URL, cards_file)
            
-    # Load cards.csv file into pandas dataframe so it can be further broken down
-    df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
-    
-    # Set frames that have nothing for color identity to be 'Colorless' instead
+        # Load and process cards data
+        df = pd.read_csv(cards_file, low_memory=False)
        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
        
-    legendary_options = ['Legendary Creature','Legendary Artifact', 'Legendary Artifact Creature', 'Legendary Enchantment Creature', 'Legendary Planeswalker']
-    filtered_df = df[df['type'].str.contains('|'.join(legendary_options))]
-    """
-    Save the filtered dataframe to a new csv file, and narrow down/rearranges the columns it
-    keeps to increase readability/trim some extra data.
-    Additionally attempts to remove as many duplicates (including cards with reversible prints,
-    as well as taking out Arena-only cards.
-    """
-    rows_to_drop = []
-    non_legel_sets = ['PHTR', 'PH17', 'PH18' ,'PH19', 'PH20', 'PH21', 'UGL', 'UND', 'UNH', 'UST',]
-    for index, row in filtered_df.iterrows():
-        if ('Legendary Artifact' in row['type']
-            or 'Legendary Planeswalker' in row['type']):
-            if 'Legendary Artifact Creature' not in row['type']:
-                if pd.notna(row['text']):
-                    if f'{row['name']} can be your commander' not in row['text']:
-                        rows_to_drop.append(index)
-        for illegal_set in non_legel_sets:
-            if illegal_set in row['printings']:
-                rows_to_drop.append(index)
+        # Process legendary cards
+        filtered_df = process_legendary_cards(df)
        
-    filtered_df = filtered_df.drop(rows_to_drop)
+        # Apply standard filters
+        filtered_df = filter_dataframe(filtered_df, banned_cards)
        
-    filtered_df.sort_values('name')
-    filtered_df = filtered_df.loc[filtered_df['layout'] != 'reversible_card'] 
-    filtered_df = filtered_df[filtered_df['availability'].str.contains('paper')]
-    filtered_df = filtered_df.loc[filtered_df['promoTypes'] != 'playtest']
-    filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'heart']
-    filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'acorn']
-    
-    for card in banned_cards:
-        filtered_df = filtered_df[~filtered_df['name'].str.contains(card)]
-    
-    card_types = ['Plane —', 'Conspiracy', 'Vanguard', 'Scheme', 'Phenomenon', 'Stickers', 'Attraction', 'Hero', 'Contraption']
-    for card_type in card_types:
-        filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type)]
-    filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
-    filtered_df.drop_duplicates(subset='faceName', keep='first', inplace=True)
-    columns_to_keep = ['name', 'faceName','edhrecRank','colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'layout', 'text', 'power', 'toughness', 'keywords', 'side']
-    filtered_df = filtered_df[columns_to_keep]
-    filtered_df.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
+        # Save commander cards
        filtered_df.to_csv(f'{csv_directory}/commander_cards.csv', index=False)
-    
        print('commander_cards.csv file generated.')
        
+    except Exception as e:
+        print(f'Error generating commander cards: {str(e)}')
+        raise
+    
 def initial_setup():
-    print('Checking for cards.csv file.\n')
-    while True:
+    """Perform initial setup by downloading card data and creating filtered CSV files.
+    
+    This function:
+    1. Downloads the latest card data from MTGJSON if needed
+    2. Creates color-filtered CSV files
+    3. Generates commander-eligible cards list
+    
+    Uses utility functions from setup_utils.py for file operations and data processing.
+    Implements proper error handling for file operations and data processing.
+    """
+    logger.info('Checking for cards.csv file')
+    
    try:
-            with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
-                print('cards.csv exists.')
-                break
+        cards_file = f'{csv_directory}/cards.csv'
+        try:
+            with open(cards_file, 'r', encoding='utf-8'):
+                logger.info('cards.csv exists')
        except FileNotFoundError:
-            # If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
-            print('cards.csv not found, downloading from mtgjson')
-            url = 'https://mtgjson.com/api/v5/csv/cards.csv'
-            r = requests.get(url)
-            with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
-                outputfile.write(r.content)
+            logger.info('cards.csv not found, downloading from mtgjson')
+            download_cards_csv(MTGJSON_API_URL, cards_file)

-    # Load cards.csv file into pandas dataframe so it can be further broken down
-    df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
-
-    # Set frames that have nothing for color identity to be 'Colorless' instead
+        df = pd.read_csv(cards_file, low_memory=False)
        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')

-    # Check for and create missing, individual color identity sorted CSVs
-    print('Checking for color identity sorted files.\n')
+        logger.info('Checking for color identity sorted files')
        
-    # For loop to iterate through the colors
-    for i in range(min(len(colors), len(color_abrv))):
-        print(f'Checking for {colors[i]}_cards.csv.')
+        for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
+            logger.info(f'Checking for {SETUP_COLORS[i]}_cards.csv')
            try:
-            with open(f'{csv_directory}/{colors[i]}_cards.csv', 'r', encoding='utf-8'):
-                print(f'{colors[i]}_cards.csv exists.\n')
+                with open(f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv', 'r', encoding='utf-8'):
+                    logger.info(f'{SETUP_COLORS[i]}_cards.csv exists')
            except FileNotFoundError:
-            print(f'{colors[i]}_cards.csv not found, creating one.\n')
-            filter_by_color(df, 'colorIdentity', color_abrv[i], f'{csv_directory}/{colors[i]}_cards.csv')
+                logger.info(f'{SETUP_COLORS[i]}_cards.csv not found, creating one')
+                filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv')

-    # Once by-color lists have been made, Determine legendary creatures
+        # Generate commander list
        determine_commanders()

-    # Once Legendary creatures are determined, generate staple lists
-    # generate_staple_lists()
+    except Exception as e:
+        logger.error(f'Error during initial setup: {str(e)}')
+        raise
    
 def regenerate_csvs_all():
    """
@ -188,10 +149,10 @@ def regenerate_csvs_all():
    print('Regenerating color identity sorted files.\n')
    
    # For loop to iterate through the colors
-    for i in range(min(len(colors), len(color_abrv))):
-        print(f'Regenerating {colors[i]}_cards.csv.')
-        filter_by_color(df, 'colorIdentity', color_abrv[i], f'csv_files/{colors[i]}_cards.csv')
-        print(f'A new {colors[i]}_cards.csv file has been made.\n')
+    for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
+        print(f'Regenerating {SETUP_COLORS[i]}_cards.csv.')
+        filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'csv_files/{SETUP_COLORS[i]}_cards.csv')
+        print(f'A new {SETUP_COLORS[i]}_cards.csv file has been made.\n')

    # Once files are regenerated, create a new legendary list
    determine_commanders()
@ -201,8 +162,8 @@ def regenerate_csv_by_color(color):
    Pull the original cards.csv file and remake the {color}_cards.csv files
    """
    # Determine the color_abv to use
-    color_abrv_index = colors.index(color)
-    color_abv = color_abrv[color_abrv_index]
+    COLOR_ABRV_index = SETUP_COLORS.index(color)
+    color_abv = COLOR_ABRV[COLOR_ABRV_index]
    print('Downloading cards.csv from mtgjson')
    url = 'https://mtgjson.com/api/v5/csv/cards.csv'
    r = requests.get(url)
@ -258,7 +219,4 @@ def setup():
            break
        break

-#regenerate_csvs_all()
-#regenerate_csv_by_color('white')
-#determine_commanders()
-#set_lands()
+initial_setup()
--- a/setup_utils.py
+++ b/setup_utils.py
@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import pandas as pd
+import requests
+import logging
+from tqdm import tqdm
+from pathlib import Path
+from typing import List, Optional, Union
+
+from settings import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    LEGENDARY_OPTIONS
+)
+from exceptions import CSVFileNotFoundError, MTGJSONDownloadError, DataFrameProcessingError
+
+def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
+    """Download cards data from MTGJSON and save to CSV.
+
+    Args:
+        url: URL to download cards data from
+        output_path: Path to save the downloaded CSV file
+
+    Raises:
+        MTGJSONDownloadError: If download fails or response is invalid
+    """
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        
+        with open(output_path, 'wb') as f:
+            with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    size = f.write(chunk)
+                    pbar.update(size)
+            
+    except requests.RequestException as e:
+        raise MTGJSONDownloadError(
+            "Failed to download cards data",
+            url,
+            getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
+        ) from e
+def check_csv_exists(filepath: Union[str, Path]) -> bool:
+    """Check if a CSV file exists at the specified path.
+
+    Args:
+        filepath: Path to check for CSV file
+
+    Returns:
+        True if file exists, False otherwise
+    """
+    return Path(filepath).is_file()
+
+def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
+    """Apply standard filters to the cards DataFrame.
+
+    Args:
+        df: DataFrame to filter
+        banned_cards: List of banned card names to exclude
+
+    Returns:
+        Filtered DataFrame
+
+    Raises:
+        DataFrameProcessingError: If filtering operations fail
+    """
+    try:
+        # Fill null color identities
+        df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+        
+        # Basic filters
+        filtered_df = df[
+            (df['layout'] != 'reversible_card') &
+            (df['availability'].str.contains('paper', na=False)) &
+            (df['promoTypes'] != 'playtest') &
+            (~df['securityStamp'].str.contains('Heart|Acorn', na=False))
+        ]
+        
+        # Remove illegal sets
+        for set_code in NON_LEGAL_SETS:
+            filtered_df = filtered_df[
+                ~filtered_df['printings'].str.contains(set_code, na=False)
+            ]
+
+        # Remove banned cards
+        for card in banned_cards:
+            filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
+
+        # Remove special card types
+        for card_type in CARD_TYPES_TO_EXCLUDE:
+            filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
+
+        # Handle face names and duplicates
+        filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
+        filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
+
+        # Select and sort columns
+        filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
+        
+        return filtered_df.sort_values(by=['name', 'side'], 
+                                     key=lambda col: col.str.lower())
+
+    except Exception as e:
+        raise DataFrameProcessingError(
+            "Failed to filter DataFrame",
+            "standard_filtering",
+            str(e)
+        ) from e
+
+def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
+    """Process and filter legendary cards for commander eligibility.
+
+    Args:
+        df: DataFrame containing all cards
+
+    Returns:
+        DataFrame containing only commander-eligible cards
+
+    Raises:
+        DataFrameProcessingError: If processing fails
+    """
+    try:
+        # Filter for legendary creatures and eligible cards
+        mask = df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
+        
+        # Add cards that can be commanders
+        can_be_commander = df['text'].str.contains(
+            'can be your commander', 
+            na=False
+        )
+        
+        filtered_df = df[mask | can_be_commander].copy()
+
+        # Remove illegal sets
+        for set_code in NON_LEGAL_SETS:
+            filtered_df = filtered_df[
+                ~filtered_df['printings'].str.contains(set_code, na=False)
+            ]
+
+        return filtered_df
+
+    except Exception as e:
+        raise DataFrameProcessingError(
+            "Failed to process legendary cards",
+            "commander_processing",
+            str(e)
+        ) from e