Noticed that changes made in setup have drastically reduced cards after filtering. Redoing setup and it's associated files to fix this

This commit is contained in:
mwisnowski 2025-01-13 10:06:19 -08:00
parent c1d6b5ce18
commit c4d773d663
4 changed files with 369 additions and 117 deletions

85
exceptions.py Normal file
View file

@ -0,0 +1,85 @@
"""Custom exceptions for MTG Python Deckbuilder setup operations."""
class MTGSetupError(Exception):
"""Base exception class for MTG setup-related errors."""
pass
class CSVFileNotFoundError(MTGSetupError):
"""Exception raised when a required CSV file is not found.
This exception is raised when attempting to access or process a CSV file
that does not exist in the expected location.
Args:
message: Explanation of the error
filename: Name of the missing CSV file
"""
def __init__(self, message: str, filename: str) -> None:
self.filename = filename
super().__init__(f"{message}: {filename}")
class MTGJSONDownloadError(MTGSetupError):
"""Exception raised when downloading data from MTGJSON fails.
This exception is raised when there are issues downloading card data
from the MTGJSON API, such as network errors or API failures.
Args:
message: Explanation of the error
url: The URL that failed to download
status_code: HTTP status code if available
"""
def __init__(self, message: str, url: str, status_code: int = None) -> None:
self.url = url
self.status_code = status_code
status_info = f" (Status: {status_code})" if status_code else ""
super().__init__(f"{message}: {url}{status_info}")
class DataFrameProcessingError(MTGSetupError):
"""Exception raised when DataFrame operations fail during setup.
This exception is raised when there are issues processing card data
in pandas DataFrames, such as filtering, sorting, or transformation errors.
Args:
message: Explanation of the error
operation: The DataFrame operation that failed (e.g., 'color_filtering', 'commander_processing')
details: Additional error details
Examples:
>>> raise DataFrameProcessingError(
... "Invalid color identity",
... "color_filtering",
... "Color 'P' is not a valid MTG color"
... )
"""
def __init__(self, message: str, operation: str, details: str = None) -> None:
self.operation = operation
self.details = details
error_info = f" - {details}" if details else ""
super().__init__(f"{message} during {operation}{error_info}")
class ColorFilterError(MTGSetupError):
"""Exception raised when color-specific filtering operations fail.
This exception is raised when there are issues filtering cards by color,
such as invalid color specifications or color identity processing errors.
Args:
message: Explanation of the error
color: The color value that caused the error
details: Additional error details
Examples:
>>> raise ColorFilterError(
... "Invalid color specification",
... "Purple",
... "Color must be one of: W, U, B, R, G, or C"
... )
"""
def __init__(self, message: str, color: str, details: str = None) -> None:
self.color = color
self.details = details
error_info = f" - {details}" if details else ""
super().__init__(f"{message} for color '{color}'{error_info}")

View file

@ -764,3 +764,63 @@ VOLTRON_PATTERNS = [
'living weapon',
'reconfigure'
]
# Constants for setup and CSV processing
MTGJSON_API_URL = 'https://mtgjson.com/api/v5/csv/cards.csv'
LEGENDARY_OPTIONS = [
'Legendary Creature',
'Legendary Artifact',
'Legendary Artifact Creature',
'Legendary Enchantment Creature',
'Legendary Planeswalker'
]
NON_LEGAL_SETS = [
'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
'UGL', 'UND', 'UNH', 'UST'
]
CARD_TYPES_TO_EXCLUDE = [
'Plane —',
'Conspiracy',
'Vanguard',
'Scheme',
'Phenomenon',
'Stickers',
'Attraction',
'Hero',
'Contraption'
]
CSV_PROCESSING_COLUMNS = [
'name',
'faceName',
'edhrecRank',
'colorIdentity',
'colors',
'manaCost',
'manaValue',
'type',
'layout',
'text',
'power',
'toughness',
'keywords',
'side'
]
SETUP_COLORS = ['colorless', 'white', 'blue', 'black', 'green', 'red',
'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
'bant', 'esper', 'grixis', 'jund', 'naya',
'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
COLOR_ABRV = ['Colorless', 'W', 'U', 'B', 'G', 'R',
'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
'B, R, U, W', 'B, G, R, U, W']

168
setup.py
View file

@ -3,23 +3,18 @@ from __future__ import annotations
import pandas as pd # type: ignore
import requests # type: ignore
import inquirer.prompt # type: ignore
import logging
from settings import banned_cards, csv_directory
from settings import banned_cards, csv_directory, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
from setup_utils import download_cards_csv, filter_dataframe, process_legendary_cards
colors = ['colorless', 'white', 'blue', 'black', 'green', 'red',
'azorius', 'orzhov', 'selesnya', 'boros', 'dimir',
'simic', 'izzet', 'golgari', 'rakdos', 'gruul',
'bant', 'esper', 'grixis', 'jund', 'naya',
'abzan', 'jeskai', 'mardu', 'sultai', 'temur',
'dune', 'glint', 'ink', 'witch', 'yore', 'wubrg']
color_abrv = ['Colorless', 'W', 'U', 'B', 'G', 'R',
'U, W', 'B, W', 'G, W', 'R, W', 'B, U',
'G, U', 'R, U', 'B, G', 'B, R', 'G, R',
'G, U, W', 'B, U, W', 'B, R, U', 'B, G, R', 'G, R, W',
'B, G, W', 'R, U, W', 'B, R, W', 'B, G, U', 'G, R, U',
'B, G, R, W', 'B, G, R, U', 'G, R, U, W', 'B, G, U, W',
'B, R, U, W', 'B, G, R, U, W']
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
def filter_by_color(df, column_name, value, new_csv_name):
# Filter dataframe
@ -54,110 +49,76 @@ def filter_by_color(df, column_name, value, new_csv_name):
def determine_commanders():
print('Generating commander_cards.csv, containing all cards elligible to be commanders.')
# Filter dataframe
while True:
try:
with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
# Check for cards.csv
cards_file = f'{csv_directory}/cards.csv'
try:
with open(cards_file, 'r', encoding='utf-8'):
print('cards.csv exists.')
break
except FileNotFoundError:
# If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
print('cards.csv not found, downloading from mtgjson')
url = 'https://mtgjson.com/api/v5/csv/cards.csv'
r = requests.get(url)
with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
outputfile.write(r.content)
download_cards_csv(MTGJSON_API_URL, cards_file)
# Load cards.csv file into pandas dataframe so it can be further broken down
df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
# Set frames that have nothing for color identity to be 'Colorless' instead
# Load and process cards data
df = pd.read_csv(cards_file, low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
legendary_options = ['Legendary Creature','Legendary Artifact', 'Legendary Artifact Creature', 'Legendary Enchantment Creature', 'Legendary Planeswalker']
filtered_df = df[df['type'].str.contains('|'.join(legendary_options))]
"""
Save the filtered dataframe to a new csv file, and narrow down/rearranges the columns it
keeps to increase readability/trim some extra data.
Additionally attempts to remove as many duplicates (including cards with reversible prints,
as well as taking out Arena-only cards.
"""
rows_to_drop = []
non_legel_sets = ['PHTR', 'PH17', 'PH18' ,'PH19', 'PH20', 'PH21', 'UGL', 'UND', 'UNH', 'UST',]
for index, row in filtered_df.iterrows():
if ('Legendary Artifact' in row['type']
or 'Legendary Planeswalker' in row['type']):
if 'Legendary Artifact Creature' not in row['type']:
if pd.notna(row['text']):
if f'{row['name']} can be your commander' not in row['text']:
rows_to_drop.append(index)
for illegal_set in non_legel_sets:
if illegal_set in row['printings']:
rows_to_drop.append(index)
# Process legendary cards
filtered_df = process_legendary_cards(df)
filtered_df = filtered_df.drop(rows_to_drop)
# Apply standard filters
filtered_df = filter_dataframe(filtered_df, banned_cards)
filtered_df.sort_values('name')
filtered_df = filtered_df.loc[filtered_df['layout'] != 'reversible_card']
filtered_df = filtered_df[filtered_df['availability'].str.contains('paper')]
filtered_df = filtered_df.loc[filtered_df['promoTypes'] != 'playtest']
filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'heart']
filtered_df = filtered_df.loc[filtered_df['securityStamp'] != 'acorn']
for card in banned_cards:
filtered_df = filtered_df[~filtered_df['name'].str.contains(card)]
card_types = ['Plane —', 'Conspiracy', 'Vanguard', 'Scheme', 'Phenomenon', 'Stickers', 'Attraction', 'Hero', 'Contraption']
for card_type in card_types:
filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type)]
filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
filtered_df.drop_duplicates(subset='faceName', keep='first', inplace=True)
columns_to_keep = ['name', 'faceName','edhrecRank','colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'layout', 'text', 'power', 'toughness', 'keywords', 'side']
filtered_df = filtered_df[columns_to_keep]
filtered_df.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
# Save commander cards
filtered_df.to_csv(f'{csv_directory}/commander_cards.csv', index=False)
print('commander_cards.csv file generated.')
except Exception as e:
print(f'Error generating commander cards: {str(e)}')
raise
def initial_setup():
print('Checking for cards.csv file.\n')
while True:
"""Perform initial setup by downloading card data and creating filtered CSV files.
This function:
1. Downloads the latest card data from MTGJSON if needed
2. Creates color-filtered CSV files
3. Generates commander-eligible cards list
Uses utility functions from setup_utils.py for file operations and data processing.
Implements proper error handling for file operations and data processing.
"""
logger.info('Checking for cards.csv file')
try:
with open(f'{csv_directory}/cards.csv', 'r', encoding='utf-8'):
print('cards.csv exists.')
break
cards_file = f'{csv_directory}/cards.csv'
try:
with open(cards_file, 'r', encoding='utf-8'):
logger.info('cards.csv exists')
except FileNotFoundError:
# If the cards.csv file does not exist or can't be found, pull it from mtgjson.com
print('cards.csv not found, downloading from mtgjson')
url = 'https://mtgjson.com/api/v5/csv/cards.csv'
r = requests.get(url)
with open(f'{csv_directory}/cards.csv', 'wb') as outputfile:
outputfile.write(r.content)
logger.info('cards.csv not found, downloading from mtgjson')
download_cards_csv(MTGJSON_API_URL, cards_file)
# Load cards.csv file into pandas dataframe so it can be further broken down
df = pd.read_csv(f'{csv_directory}/cards.csv', low_memory=False)
# Set frames that have nothing for color identity to be 'Colorless' instead
df = pd.read_csv(cards_file, low_memory=False)
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
# Check for and create missing, individual color identity sorted CSVs
print('Checking for color identity sorted files.\n')
logger.info('Checking for color identity sorted files')
# For loop to iterate through the colors
for i in range(min(len(colors), len(color_abrv))):
print(f'Checking for {colors[i]}_cards.csv.')
for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
logger.info(f'Checking for {SETUP_COLORS[i]}_cards.csv')
try:
with open(f'{csv_directory}/{colors[i]}_cards.csv', 'r', encoding='utf-8'):
print(f'{colors[i]}_cards.csv exists.\n')
with open(f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv', 'r', encoding='utf-8'):
logger.info(f'{SETUP_COLORS[i]}_cards.csv exists')
except FileNotFoundError:
print(f'{colors[i]}_cards.csv not found, creating one.\n')
filter_by_color(df, 'colorIdentity', color_abrv[i], f'{csv_directory}/{colors[i]}_cards.csv')
logger.info(f'{SETUP_COLORS[i]}_cards.csv not found, creating one')
filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'{csv_directory}/{SETUP_COLORS[i]}_cards.csv')
# Once by-color lists have been made, Determine legendary creatures
# Generate commander list
determine_commanders()
# Once Legendary creatures are determined, generate staple lists
# generate_staple_lists()
except Exception as e:
logger.error(f'Error during initial setup: {str(e)}')
raise
def regenerate_csvs_all():
"""
@ -188,10 +149,10 @@ def regenerate_csvs_all():
print('Regenerating color identity sorted files.\n')
# For loop to iterate through the colors
for i in range(min(len(colors), len(color_abrv))):
print(f'Regenerating {colors[i]}_cards.csv.')
filter_by_color(df, 'colorIdentity', color_abrv[i], f'csv_files/{colors[i]}_cards.csv')
print(f'A new {colors[i]}_cards.csv file has been made.\n')
for i in range(min(len(SETUP_COLORS), len(COLOR_ABRV))):
print(f'Regenerating {SETUP_COLORS[i]}_cards.csv.')
filter_by_color(df, 'colorIdentity', COLOR_ABRV[i], f'csv_files/{SETUP_COLORS[i]}_cards.csv')
print(f'A new {SETUP_COLORS[i]}_cards.csv file has been made.\n')
# Once files are regenerated, create a new legendary list
determine_commanders()
@ -201,8 +162,8 @@ def regenerate_csv_by_color(color):
Pull the original cards.csv file and remake the {color}_cards.csv files
"""
# Determine the color_abv to use
color_abrv_index = colors.index(color)
color_abv = color_abrv[color_abrv_index]
COLOR_ABRV_index = SETUP_COLORS.index(color)
color_abv = COLOR_ABRV[COLOR_ABRV_index]
print('Downloading cards.csv from mtgjson')
url = 'https://mtgjson.com/api/v5/csv/cards.csv'
r = requests.get(url)
@ -258,7 +219,4 @@ def setup():
break
break
#regenerate_csvs_all()
#regenerate_csv_by_color('white')
#determine_commanders()
#set_lands()
initial_setup()

149
setup_utils.py Normal file
View file

@ -0,0 +1,149 @@
from __future__ import annotations
import pandas as pd
import requests
import logging
from tqdm import tqdm
from pathlib import Path
from typing import List, Optional, Union
from settings import (
CSV_PROCESSING_COLUMNS,
CARD_TYPES_TO_EXCLUDE,
NON_LEGAL_SETS,
LEGENDARY_OPTIONS
)
from exceptions import CSVFileNotFoundError, MTGJSONDownloadError, DataFrameProcessingError
def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
"""Download cards data from MTGJSON and save to CSV.
Args:
url: URL to download cards data from
output_path: Path to save the downloaded CSV file
Raises:
MTGJSONDownloadError: If download fails or response is invalid
"""
try:
response = requests.get(url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
with open(output_path, 'wb') as f:
with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
for chunk in response.iter_content(chunk_size=8192):
size = f.write(chunk)
pbar.update(size)
except requests.RequestException as e:
raise MTGJSONDownloadError(
"Failed to download cards data",
url,
getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
) from e
def check_csv_exists(filepath: Union[str, Path]) -> bool:
"""Check if a CSV file exists at the specified path.
Args:
filepath: Path to check for CSV file
Returns:
True if file exists, False otherwise
"""
return Path(filepath).is_file()
def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
"""Apply standard filters to the cards DataFrame.
Args:
df: DataFrame to filter
banned_cards: List of banned card names to exclude
Returns:
Filtered DataFrame
Raises:
DataFrameProcessingError: If filtering operations fail
"""
try:
# Fill null color identities
df['colorIdentity'] = df['colorIdentity'].fillna('Colorless')
# Basic filters
filtered_df = df[
(df['layout'] != 'reversible_card') &
(df['availability'].str.contains('paper', na=False)) &
(df['promoTypes'] != 'playtest') &
(~df['securityStamp'].str.contains('Heart|Acorn', na=False))
]
# Remove illegal sets
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
# Remove banned cards
for card in banned_cards:
filtered_df = filtered_df[~filtered_df['name'].str.contains(card, na=False)]
# Remove special card types
for card_type in CARD_TYPES_TO_EXCLUDE:
filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
# Handle face names and duplicates
filtered_df['faceName'] = filtered_df['faceName'].fillna(filtered_df['name'])
filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
# Select and sort columns
filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
return filtered_df.sort_values(by=['name', 'side'],
key=lambda col: col.str.lower())
except Exception as e:
raise DataFrameProcessingError(
"Failed to filter DataFrame",
"standard_filtering",
str(e)
) from e
def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
"""Process and filter legendary cards for commander eligibility.
Args:
df: DataFrame containing all cards
Returns:
DataFrame containing only commander-eligible cards
Raises:
DataFrameProcessingError: If processing fails
"""
try:
# Filter for legendary creatures and eligible cards
mask = df['type'].str.contains('|'.join(LEGENDARY_OPTIONS), na=False)
# Add cards that can be commanders
can_be_commander = df['text'].str.contains(
'can be your commander',
na=False
)
filtered_df = df[mask | can_be_commander].copy()
# Remove illegal sets
for set_code in NON_LEGAL_SETS:
filtered_df = filtered_df[
~filtered_df['printings'].str.contains(set_code, na=False)
]
return filtered_df
except Exception as e:
raise DataFrameProcessingError(
"Failed to process legendary cards",
"commander_processing",
str(e)
) from e