Began work on overhauling the deck_builder

2025-12-17 08:00:13 +01:00 · 2025-01-14 12:07:49 -08:00 · 2025-01-14 12:07:49 -08:00 · 319f7848d3
commit 319f7848d3
parent e0dd09adee
10 changed files with 2589 additions and 761 deletions
--- a/setup_utils.py
+++ b/setup_utils.py
@ -36,6 +36,10 @@ from settings import (
    FILL_NA_COLUMNS,
    SORT_CONFIG,
    FILTER_CONFIG,
+    COLUMN_ORDER,
+    PRETAG_COLUMN_ORDER,
+    EXCLUDED_CARD_TYPES,
+    TAGGED_COLUMN_ORDER
 )
 from exceptions import (
    MTGJSONDownloadError,
@ -43,6 +47,7 @@ from exceptions import (
    ColorFilterError,
    CommanderValidationError
 )
+from type_definitions import CardLibraryDF

 # Create logs directory if it doesn't exist
 if not os.path.exists('logs'):
@ -339,4 +344,74 @@ def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
            "Failed to process legendary cards",
            "commander_processing",
            str(e)
-        ) from e
+        ) from e
+
+def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
+                         include_commander_cols: bool = False, skip_availability_checks: bool = False) -> pd.DataFrame:
+    """Process DataFrame with common operations in batches.
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+        skip_availability_checks: Whether to skip availability and security checks (default: False)
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+
+    Returns:
+        CardLibraryDF: Processed DataFrame with standardized structure
+    """
+    logger.info("Processing card DataFrame...")
+
+    if columns_to_keep is None:
+        columns_to_keep = TAGGED_COLUMN_ORDER.copy()
+        if include_commander_cols:
+            commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
+            columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
+
+    # Fill NA values
+    df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+    df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
+
+    # Process in batches
+    total_batches = len(df) // batch_size + 1
+    processed_dfs = []
+
+    for i in tqdm(range(total_batches), desc="Processing batches"):
+        start_idx = i * batch_size
+        end_idx = min((i + 1) * batch_size, len(df))
+        batch = df.iloc[start_idx:end_idx].copy()
+
+        if not skip_availability_checks:
+            columns_to_keep = COLUMN_ORDER.copy()
+            logger.debug("Performing column checks...")
+            # Common processing steps
+            batch = batch[batch['availability'].str.contains('paper', na=False)]
+            batch = batch.loc[batch['layout'] != 'reversible_card']
+            batch = batch.loc[batch['promoTypes'] != 'playtest']
+            batch = batch.loc[batch['securityStamp'] != 'heart']
+            batch = batch.loc[batch['securityStamp'] != 'acorn']
+            # Keep only specified columns
+            batch = batch[columns_to_keep]
+            processed_dfs.append(batch)
+        else:
+            logger.debug("Skipping column checks...")
+    
+    # Keep only specified columns
+    batch = batch[columns_to_keep]
+    processed_dfs.append(batch)
+
+    # Combine processed batches
+    result = pd.concat(processed_dfs, ignore_index=True)
+
+    # Final processing
+    result.drop_duplicates(subset='faceName', keep='first', inplace=True)
+    result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
+
+    logger.info("DataFrame processing completed")
+    return result