removed unneeded test in project root

Merge pull request #50 from mwisnowski/maintenance/web-unification
Web UI Architecture Improvements: Modern Stack & Quality Enhancements
2025-12-16 15:40:12 +01:00 · 2025-11-07 10:18:53 -08:00 · 2025-11-07 09:24:25 -08:00 · 2025-11-04 10:08:49 -08:00 · 2025-10-31 10:11:00 -07:00 · 2025-10-31 08:18:09 -07:00
263 changed files with 46985 additions and 11305 deletions
--- a/.env.example
+++ b/.env.example
@ -13,7 +13,7 @@
 # HOST=0.0.0.0                       # Uvicorn bind host (only when APP_MODE=web).
 # PORT=8080                          # Uvicorn port.
 # WORKERS=1                          # Uvicorn worker count.
-APP_VERSION=v2.7.0                 # Matches dockerhub compose.
+APP_VERSION=v3.0.1                 # Matches dockerhub compose.

 ############################
 # Theming
@ -27,9 +27,17 @@ THEME=system                        # system|light|dark (initial default; user p
 # DECK_EXPORTS=/app/deck_files       # Where finished deck exports are read by Web UI.
 # OWNED_CARDS_DIR=/app/owned_cards   # Preferred directory for owned inventory uploads.
 # CARD_LIBRARY_DIR=/app/owned_cards  # Back-compat alias for OWNED_CARDS_DIR.
-# CSV_FILES_DIR=/app/csv_files       # Override CSV base dir (use test snapshots or alternate datasets)
+# CSV_FILES_DIR=/app/csv_files       # Override CSV base dir (DEPRECATED v3.0.0+, use CARD_FILES_* instead)
 # CARD_INDEX_EXTRA_CSV=              # Inject an extra CSV into the card index for testing

+# Parquet-based card files (v3.0.0+)
+# CARD_FILES_DIR=card_files          # Base directory for Parquet files (default: card_files)
+# CARD_FILES_RAW_DIR=card_files/raw  # Raw MTGJSON Parquet files (default: card_files/raw)
+# CARD_FILES_PROCESSED_DIR=card_files/processed  # Processed/tagged Parquet files (default: card_files/processed)
+
+# Legacy CSV compatibility (v3.0.0 only, removed in v3.1.0)
+# LEGACY_CSV_COMPAT=0                # Set to 1 to enable CSV fallback when Parquet loading fails
+
 ############################
 # Web UI Feature Flags
 ############################
@ -44,11 +52,16 @@ ENABLE_PRESETS=0                    # dockerhub: ENABLE_PRESETS="0"
 WEB_VIRTUALIZE=1                    # dockerhub: WEB_VIRTUALIZE="1"
 ALLOW_MUST_HAVES=1                  # dockerhub: ALLOW_MUST_HAVES="1"
 SHOW_MUST_HAVE_BUTTONS=0           # dockerhub: SHOW_MUST_HAVE_BUTTONS="0" (set to 1 to surface must include/exclude buttons)
-WEB_THEME_PICKER_DIAGNOSTICS=0      # 1=enable uncapped synergies, diagnostics fields & /themes/metrics (dev only)
+WEB_THEME_PICKER_DIAGNOSTICS=1      # dockerhub: WEB_THEME_PICKER_DIAGNOSTICS="1"
+ENABLE_CARD_DETAILS=1               # dockerhub: ENABLE_CARD_DETAILS="1"
+SIMILARITY_CACHE_ENABLED=1          # dockerhub: SIMILARITY_CACHE_ENABLED="1"
+SIMILARITY_CACHE_PATH="card_files/similarity_cache.parquet"     # Path to Parquet cache file
+ENABLE_BATCH_BUILD=1                # dockerhub: ENABLE_BATCH_BUILD="1" (enable Build X and Compare feature)

 ############################
 # Partner / Background Mechanics
 ############################
+# HEADLESS_EXPORT_JSON=1              # 1=export resolved run config JSON
 ENABLE_PARTNER_MECHANICS=1          # 1=unlock partner/background commander inputs for headless (web wiring in progress)
 ENABLE_PARTNER_SUGGESTIONS=1        # 1=enable partner suggestion API and UI chips (dataset auto-refreshes when missing)
 # PARTNER_SUGGESTIONS_DATASET=config/analytics/partner_synergy.json  # Optional override path for the suggestion dataset
@ -93,6 +106,9 @@ WEB_TAG_PARALLEL=1                  # dockerhub: WEB_TAG_PARALLEL="1"
 WEB_TAG_WORKERS=2                   # dockerhub: WEB_TAG_WORKERS="4"
 WEB_AUTO_ENFORCE=0                  # dockerhub: WEB_AUTO_ENFORCE="0"

+# Card Image Caching (optional, uses Scryfall bulk data API)
+CACHE_CARD_IMAGES=1                 # dockerhub: CACHE_CARD_IMAGES="1" (1=download images to card_files/images/, 0=fetch from Scryfall API on demand)
+
 # Build Stage Ordering
 WEB_STAGE_ORDER=new                 # new|legacy. 'new' (default): creatures → spells → lands → fill. 'legacy': lands → creatures → spells → fill

--- a/.github/workflows/build-similarity-cache.yml
+++ b/.github/workflows/build-similarity-cache.yml
@ -0,0 +1,293 @@
+name: Build Similarity Cache
+
+# Manual trigger + weekly schedule + callable from other workflows
+on:
+  workflow_dispatch:
+    inputs:
+      force_rebuild:
+        description: 'Force rebuild even if cache exists'
+        required: false
+        type: boolean
+        default: true
+  workflow_call:  # Allow this workflow to be called by other workflows
+  schedule:
+    # Run every Sunday at 2 AM UTC
+    - cron: '0 2 * * 0'
+
+jobs:
+  build-cache:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+      
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+      
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      
+      - name: Check if cache needs rebuild
+        id: check_cache
+        run: |
+          FORCE="${{ github.event.inputs.force_rebuild }}"
+          if [ "$FORCE" = "true" ] || [ ! -f "card_files/similarity_cache.parquet" ]; then
+            echo "needs_build=true" >> $GITHUB_OUTPUT
+            echo "Cache doesn't exist or force rebuild requested"
+          else
+            # Check cache age via metadata JSON
+            CACHE_AGE_DAYS=$(python -c "
+          import json
+          from datetime import datetime
+          from pathlib import Path
+          
+          metadata_path = Path('card_files/similarity_cache_metadata.json')
+          if metadata_path.exists():
+              with open(metadata_path) as f:
+                  data = json.load(f)
+              build_date = data.get('build_date')
+              if build_date:
+                  age = (datetime.now() - datetime.fromisoformat(build_date)).days
+                  print(age)
+              else:
+                  print(999)
+          else:
+              print(999)
+          " || echo "999")
+            
+            if [ "$CACHE_AGE_DAYS" -gt 7 ]; then
+              echo "needs_build=true" >> $GITHUB_OUTPUT
+              echo "Cache is $CACHE_AGE_DAYS days old, rebuilding"
+            else
+              echo "needs_build=false" >> $GITHUB_OUTPUT
+              echo "Cache is only $CACHE_AGE_DAYS days old, skipping"
+            fi
+          fi
+      
+      - name: Run initial setup
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          python -c "from code.file_setup.setup import initial_setup; initial_setup()"
+      
+      - name: Run tagging (serial for CI reliability)
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          python -c "from code.tagging.tagger import run_tagging; run_tagging(parallel=False)"
+          
+          # Verify tagging completed
+          if [ ! -f "card_files/processed/.tagging_complete.json" ]; then
+            echo "ERROR: Tagging completion flag not found"
+            exit 1
+          fi
+      
+      - name: Debug - Inspect Parquet file after tagging
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          python -c "
+          import pandas as pd
+          from pathlib import Path
+          from code.path_util import get_processed_cards_path
+          
+          parquet_path = Path(get_processed_cards_path())
+          print(f'Reading Parquet file: {parquet_path}')
+          print(f'File exists: {parquet_path.exists()}')
+          
+          if not parquet_path.exists():
+              raise FileNotFoundError(f'Parquet file not found: {parquet_path}')
+          
+          df = pd.read_parquet(parquet_path)
+          print(f'Loaded {len(df)} rows from Parquet file')
+          print(f'Columns: {list(df.columns)}')
+          print('')
+          
+          # Show first 5 rows completely
+          print('First 5 complete rows:')
+          print('=' * 100)
+          for idx, row in df.head(5).iterrows():
+              print(f'Row {idx}:')
+              for col in df.columns:
+                  value = row[col]
+                  if isinstance(value, (list, tuple)) or hasattr(value, '__array__'):
+                      # For array-like, show type and length
+                      try:
+                          length = len(value)
+                          print(f'  {col}: {type(value).__name__}[{length}] = {value}')
+                      except:
+                          print(f'  {col}: {type(value).__name__} = {value}')
+                  else:
+                      print(f'  {col}: {value}')
+              print('-' * 100)
+          "
+      
+      - name: Generate theme catalog
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          if [ ! -f "config/themes/theme_catalog.csv" ]; then
+            echo "Theme catalog not found, generating..."
+            python -m code.scripts.generate_theme_catalog
+          else
+            echo "Theme catalog already exists, skipping generation"
+          fi
+      
+      - name: Verify theme catalog and tag statistics
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          # Detailed check of what tags were actually written
+          python -c "
+          import pandas as pd
+          from code.path_util import get_processed_cards_path
+          df = pd.read_parquet(get_processed_cards_path())
+          
+          # Helper to count tags (handles both list and numpy array)
+          def count_tags(x):
+              if x is None:
+                  return 0
+              if hasattr(x, '__len__'):
+                  try:
+                      return len(x)
+                  except:
+                      return 0
+              return 0
+          
+          # Count total tags
+          total_tags = 0
+          cards_with_tags = 0
+          sample_cards = []
+          
+          for idx, row in df.head(10).iterrows():
+              name = row['name']
+              tags = row['themeTags']
+              tag_count = count_tags(tags)
+              total_tags += tag_count
+              if tag_count > 0:
+                  cards_with_tags += 1
+                  sample_cards.append(f'{name}: {tag_count} tags')
+          
+          print(f'Sample of first 10 cards:')
+          for card in sample_cards:
+              print(f'  {card}')
+          
+          # Full count
+          all_tags = df['themeTags'].apply(count_tags).sum()
+          all_with_tags = (df['themeTags'].apply(count_tags) > 0).sum()
+          
+          print(f'')
+          print(f'Total cards: {len(df):,}')
+          print(f'Cards with tags: {all_with_tags:,}')
+          print(f'Total theme tags: {all_tags:,}')
+          
+          if all_tags < 10000:
+              raise ValueError(f'Only {all_tags} tags found, expected >10k')
+          "
+      
+      - name: Build similarity cache (Parquet) from card_files/processed/all_cards.parquet
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          python -m code.scripts.build_similarity_cache_parquet --parallel --checkpoint-interval 1000 --force
+      
+      - name: Verify cache was created
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          if [ ! -f "card_files/similarity_cache.parquet" ]; then
+            echo "ERROR: Similarity cache not created"
+            exit 1
+          fi
+          if [ ! -f "card_files/similarity_cache_metadata.json" ]; then
+            echo "ERROR: Similarity cache metadata not created"
+            exit 1
+          fi
+          if [ ! -f "card_files/processed/commander_cards.parquet" ]; then
+            echo "ERROR: Commander cache not created"
+            exit 1
+          fi
+          
+          echo "✓ All cache files created successfully"
+      
+      - name: Get cache metadata for commit message
+        if: steps.check_cache.outputs.needs_build == 'true'
+        id: cache_meta
+        run: |
+          METADATA=$(python -c "
+          import json
+          from pathlib import Path
+          from code.web.services.similarity_cache import get_cache
+          
+          cache = get_cache()
+          stats = cache.get_stats()
+          metadata = cache._metadata or {}
+          
+          build_date = metadata.get('build_date', 'unknown')
+          print(f\"{stats['total_cards']} cards, {stats['total_entries']} entries, {stats['file_size_mb']:.1f}MB, built {build_date}\")
+          ")
+          echo "metadata=$METADATA" >> $GITHUB_OUTPUT
+      
+      - name: Commit and push cache
+        if: steps.check_cache.outputs.needs_build == 'true'
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          
+          # Fetch all branches
+          git fetch origin
+          
+          # Try to checkout existing branch, or create new orphan branch
+          if git ls-remote --heads origin similarity-cache-data | grep similarity-cache-data; then
+            echo "Checking out existing similarity-cache-data branch..."
+            git checkout similarity-cache-data
+          else
+            echo "Creating new orphan branch similarity-cache-data..."
+            git checkout --orphan similarity-cache-data
+            git rm -rf . || true
+            # Create minimal README for the branch
+            echo "# Similarity Cache Data" > README.md
+            echo "This branch contains pre-built similarity cache files for the MTG Deckbuilder." >> README.md
+            echo "Updated automatically by GitHub Actions." >> README.md
+            echo "" >> README.md
+            echo "## Files" >> README.md
+            echo "- \`card_files/similarity_cache.parquet\` - Pre-computed card similarity cache" >> README.md
+            echo "- \`card_files/similarity_cache_metadata.json\` - Cache metadata" >> README.md
+            echo "- \`card_files/processed/all_cards.parquet\` - Tagged card database" >> README.md
+            echo "- \`card_files/processed/commander_cards.parquet\` - Commander-only cache (fast lookups)" >> README.md
+            echo "- \`card_files/processed/.tagging_complete.json\` - Tagging status" >> README.md
+          fi
+          
+          # Ensure directories exist
+          mkdir -p card_files/processed
+          
+          # Add similarity cache files (use -f to override .gitignore)
+          git add -f card_files/similarity_cache.parquet
+          git add -f card_files/similarity_cache_metadata.json
+          
+          # Add processed Parquet and status file
+          git add -f card_files/processed/all_cards.parquet
+          git add -f card_files/processed/commander_cards.parquet
+          git add -f card_files/processed/.tagging_complete.json
+          
+          git add README.md 2>/dev/null || true
+          
+          # Check if there are changes to commit
+          if git diff --staged --quiet; then
+            echo "No changes to commit"
+          else
+            git commit -m "chore: update similarity cache [${{ steps.cache_meta.outputs.metadata }}]"
+            git push origin similarity-cache-data --force
+          fi
+      
+      - name: Summary
+        if: always()
+        run: |
+          if [ "${{ steps.check_cache.outputs.needs_build }}" = "true" ]; then
+            echo "✓ Similarity cache built and committed"
+            echo "  Metadata: ${{ steps.cache_meta.outputs.metadata }}"
+          else
+            echo "⊘ Cache is recent, no rebuild needed"
+          fi
--- a/.github/workflows/dockerhub-publish.yml
+++ b/.github/workflows/dockerhub-publish.yml
@ -63,6 +63,18 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v5.0.0

+      - name: Download similarity cache from branch
+        run: |
+          # Download cache files from similarity-cache-data branch
+          mkdir -p card_files
+          wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache.parquet -O card_files/similarity_cache.parquet || echo "Cache not found, will build without it"
+          wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache_metadata.json -O card_files/similarity_cache_metadata.json || echo "Metadata not found"
+          
+          if [ -f card_files/similarity_cache.parquet ]; then
+            echo "✓ Downloaded similarity cache"
+            ls -lh card_files/similarity_cache.parquet
+          fi
+
      - name: Compute amd64 tag
        id: arch_tag
        shell: bash
@ -120,6 +132,18 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v5.0.0

+      - name: Download similarity cache from branch
+        run: |
+          # Download cache files from similarity-cache-data branch
+          mkdir -p card_files
+          wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache.parquet -O card_files/similarity_cache.parquet || echo "Cache not found, will build without it"
+          wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache_metadata.json -O card_files/similarity_cache_metadata.json || echo "Metadata not found"
+          
+          if [ -f card_files/similarity_cache.parquet ]; then
+            echo "✓ Downloaded similarity cache"
+            ls -lh card_files/similarity_cache.parquet
+          fi
+
      - name: Compute arm64 tag
        id: arch_tag
        shell: bash
--- a/.gitignore
+++ b/.gitignore
@ -9,6 +9,7 @@

 RELEASE_NOTES.md
 test.py
+test_*.py
 !test_exclude_cards.txt
 !test_include_exclude_config.json

@ -30,6 +31,7 @@ config/themes/catalog/
 csv_files/*
 !csv_files/testdata/
 !csv_files/testdata/**/*
+card_files/*

 deck_files/
 dist/
@ -39,4 +41,14 @@ logs/
 logs/*
 !logs/perf/
 logs/perf/*
-!logs/perf/theme_preview_warm_baseline.json
+!logs/perf/theme_preview_warm_baseline.json
+
+# Node.js and build artifacts
+node_modules/
+code/web/static/js/
+code/web/static/styles.css
+*.js.map
+
+# Keep TypeScript sources and Tailwind CSS input
+!code/web/static/ts/
+!code/web/static/tailwind.css
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -8,18 +8,277 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning
 - Link PRs/issues inline when helpful, e.g., (#123) or [#123]. Reference-style links at the bottom are encouraged for readability.

 ## [Unreleased]
-### Summary
-Minor UI fixes for Quick Build progress and completion display.
-
 ### Added
-_No unreleased additions yet._
+- **Template Validation Tests**: Comprehensive test suite for HTML/Jinja2 templates
+  - Validates Jinja2 syntax across all templates
+  - Checks HTML structure (balanced tags, unique IDs, proper attributes)
+  - Basic accessibility validation (alt text, form labels, button types)
+  - Regression prevention thresholds to maintain code quality
+- **Code Quality Tools**: Enhanced development tooling for maintainability
+  - Automated utilities for code cleanup
+  - Improved type checking configuration
+- **Card Image Caching**: Optional local image cache for faster card display
+  - Downloads card images from Scryfall bulk data (respects API guidelines)
+  - Graceful fallback to Scryfall API for uncached images
+  - Enabled via `CACHE_CARD_IMAGES=1` environment variable
+  - Integrated with setup/tagging process
+  - Statistics endpoint with intelligent caching (weekly refresh, matching card data staleness)
+- **Component Library**: Living documentation of reusable UI components at `/docs/components`
+  - Interactive examples of all buttons, modals, forms, cards, and panels
+  - Jinja2 macros for consistent component usage
+  - Component partial templates for reuse across pages
+- **TypeScript Migration**: Migrated JavaScript codebase to TypeScript for better type safety
+  - Converted `components.js` (376 lines) and `app.js` (1390 lines) to TypeScript
+  - Created shared type definitions for state management, telemetry, HTMX, and UI components
+  - Integrated TypeScript compilation into build process (`npm run build:ts`)
+  - Compiled JavaScript output in `code/web/static/js/` directory
+  - Docker build automatically compiles TypeScript during image creation

 ### Changed
-_No unreleased changes yet._
+- **Inline JavaScript Cleanup**: Removed legacy card hover system (~230 lines of unused code)
+- **JavaScript Consolidation**: Extracted inline scripts to TypeScript modules
+  - Created `cardHover.ts` for unified hover panel functionality
+  - Created `cardImages.ts` for card image loading with automatic retry fallbacks
+  - Reduced inline script size in base template for better maintainability
+- **Migrated CSS to Tailwind**: Consolidated and unified CSS architecture
+  - Tailwind CSS v3 with custom MTG color palette
+  - PostCSS build pipeline with autoprefixer
+  - Reduced inline styles in templates (moved to shared CSS classes)
+  - Organized CSS into functional sections with clear documentation
+- **Theme Visual Improvements**: Enhanced readability and consistency across all theme modes
+  - Light mode: Darker text for improved readability, warm earth tone color palette
+  - Dark mode: Refined contrast for better visual hierarchy
+  - High-contrast mode: Optimized for maximum accessibility
+  - Consistent hover states across all interactive elements
+  - Improved visibility of form inputs and controls
+- **JavaScript Modernization**: Updated to modern JavaScript patterns
+  - Converted `var` declarations to `const`/`let`
+  - Added TypeScript type annotations for better IDE support and error catching
+  - Consolidated event handlers and utility functions
+- **Docker Build Optimization**: Improved developer experience
+  - Hot reload enabled for templates and static files
+  - Volume mounts for rapid iteration without rebuilds
+- **Template Modernization**: Migrated templates to use component system
+- **Intelligent Synergy Builder**: Analyze multiple builds and create optimized "best-of" deck
+  - Scores cards by frequency (50%), EDHREC rank (25%), and theme tags (25%)
+  - 10% bonus for cards appearing in 80%+ of builds
+  - Color-coded synergy scores in preview (green=high, red=low)
+  - Partner commander support with combined color identity
+  - Multi-copy card tracking (e.g., 8 Mountains, 7 Islands)
+  - Export synergy deck with full metadata (CSV, TXT, JSON files)
+- `ENABLE_BATCH_BUILD` environment variable to toggle feature (default: enabled)
+- Detailed progress logging for multi-build orchestration
+- User guide: `docs/user_guides/batch_build_compare.md`
+- **Web UI Component Library**: Standardized UI components for consistent design across all pages
+  - 5 component partial template files (buttons, modals, forms, cards, panels)
+  - ~900 lines of component CSS styles
+  - Interactive JavaScript utilities (components.js)
+  - Living component library page at `/docs/components`
+  - 1600+ lines developer documentation (component_catalog.md)
+- **Custom UI Enhancements**:
+  - Darker gray styling for home page buttons
+  - Visual highlighting for selected theme chips in deck builder
+
+### Changed
+- Migrated 5 templates to new component system (home, 404, 500, setup, commanders)
+- **Type Checking Configuration**: Improved Python code quality tooling
+  - Configured type checker for better error detection
+  - Optimized linting rules for development workflow
+
+### Fixed
+- **Template Quality**: Resolved HTML structure issues found by validation tests
+  - Fixed duplicate ID attributes in build wizard and theme picker templates
+  - Removed erroneous block tags from component documentation
+  - Corrected template structure for HTMX fragments
+- **Code Quality**: Resolved type checking warnings and improved code maintainability
+  - Fixed type annotation inconsistencies
+  - Cleaned up redundant code quality suppressions
+  - Corrected configuration conflicts
+
+### Removed
+_None_
+
+### Performance
+- Hot reload for CSS/template changes (no Docker rebuild needed)
+- Optional image caching reduces Scryfall API calls
+- Faster page loads with optimized CSS
+- TypeScript compilation produces optimized JavaScript
+
+### For Users
+- Faster card image loading with optional caching
+- Cleaner, more consistent web UI design
+- Improved page load performance
+- More reliable JavaScript behavior
+
+### Deprecated
+_None_
+
+### Security
+_None_
+
+## [3.0.1] - 2025-10-19
+### Added
+_None_
+
+### Changed
+_None_
+
+### Removed
+_None_
+
+### Fixed
+- **Color Identity Display**: Fixed commander color identity showing incorrectly as "Colorless (C)" for non-partner commanders in the summary panel
+
+### Performance
+- **Commander Selection Speed**: Dramatically improved response time from 4+ seconds to under 1 second
+  - Implemented intelligent caching for card data to eliminate redundant file loading
+  - Both commander data and full card database now cached with automatic refresh when data updates
+
+### Deprecated
+_None_
+
+### Security
+_None_
+
+## [3.0.0] - 2025-10-19
+### Summary
+Major infrastructure upgrade to Parquet format with comprehensive performance improvements, simplified data management, and instant setup via GitHub downloads.
+
+### Added
+- **Parquet Migration (M4)**: Unified `card_files/processed/all_cards.parquet` replaces multiple CSV files
+  - Single source of truth for all card data (29,857 cards, 2,751 commanders, 31 backgrounds)
+  - Native support for lists and complex data types
+  - Faster loading (binary columnar format vs text parsing)
+  - Automatic deduplication and data validation
+- **Performance**: Parallel tagging option provides 4.2x speedup (22s → 5.2s)
+- **Combo Tags**: 226 cards tagged with combo-enabling abilities for better deck building
+- **Data Quality**: Built-in commander/background detection using boolean flags instead of separate files
+- **GitHub Downloads**: Pre-tagged card database and similarity cache available for instant setup
+  - Auto-download on first run (seconds instead of 15-20 minutes)
+  - Manual download button in web UI
+  - Updated weekly via automated workflow
+
+### Changed
+- **CLI & Web**: Both interfaces now load from unified Parquet data source
+- **Deck Builder**: Simplified data loading, removed CSV file juggling
+- **Web Services**: Updated card browser, commander catalog, and owned cards to use Parquet
+- **Setup Process**: Streamlined initial setup with fewer file operations
+- **Module Execution**: Use `python -m code.main` / `python -m code.headless_runner` for proper imports
+
+### Removed
+- Dependency on separate `commander_cards.csv` and `background_cards.csv` files
+- Multiple color-specific CSV file loading logic
+- CSV parsing overhead from hot paths
+
+### Technical Details
+- DataLoader class provides consistent Parquet I/O across codebase
+- Boolean filters (`isCommander`, `isBackground`) replace file-based separation
+- Numpy array conversion ensures compatibility with existing list-checking code
+- GitHub Actions updated to use processed Parquet path
+- Docker containers benefit from smaller, faster data files
+
+## [2.9.1] - 2025-10-17
+### Summary
+Improved similar cards section with refresh button and reduced sidebar animation distractions.
+
+### Added
+- Similar cards now have a refresh button to see different recommendations without reloading the page
+- Explanation text clarifying that similarities are based on shared themes and tags
+
+### Changed
+- Sidebar generally no longer animates during page loads and partial updates, reducing visual distractions
+
+### Removed
+_None_
+
+### Fixed
+_None_
+
+## [2.9.0] - 2025-10-17
+### Summary
+New card browser for exploring 29,839 Magic cards with advanced filters, similar card recommendations, and performance optimizations.
+
+### Added
+- **Card Browser**: Browse and search all Magic cards at `/browse/cards`
+  - Smart autocomplete for card names and themes with typo tolerance
+  - Multi-theme filtering (up to 5 themes)
+  - Color, type, rarity, CMC, power/toughness filters
+  - Multiple sorting options including EDHREC popularity
+  - Infinite scroll with shareable filter URLs
+- **Card Detail Pages**: Individual card pages with similar card suggestions
+  - Full card stats, oracle text, and theme tags
+  - Similar cards based on theme overlap
+  - Color-coded similarity scores
+  - Card preview on hover
+  - Enable with `ENABLE_CARD_DETAILS=1` environment variable
+- **Similarity Cache**: Pre-computed card similarities for fast page loads
+  - Build cache with parallel processing script
+  - Automatically used when available
+  - Control with `SIMILARITY_CACHE_ENABLED` environment variable
+- **Keyboard Shortcuts**: Quick navigation in card browser
+  - `Enter` to add autocomplete matches
+  - `Shift+Enter` to apply filters
+  - Double `Esc` to clear all filters
+
+### Changed
+- **Card Database**: Expanded to 29,839 cards (updated from 26,427)
+- **Theme Catalog**: Improved coverage with better filtering
+
+### Removed
+- **Unused Scripts**: Removed `regenerate_parquet.py` (functionality now in web UI setup)
+
+### Fixed
+- **Card Browser UI**: Improved styling consistency and card image loading
+- **Infinite Scroll**: Fixed cards appearing multiple times when loading more results
+- **Sorting**: Sort order now persists correctly when scrolling through all pages
+
+## [2.8.1] - 2025-10-16
+### Summary
+Improved colorless commander support with automatic card filtering and display fixes.
+
+### Added
+- **Colorless Commander Filtering**: 25 cards that don't work in colorless decks are now automatically excluded
+  - Filters out cards like Arcane Signet, Commander's Sphere, and medallions that reference "commander's color identity" or colored spells
+  - Only applies to colorless identity commanders (Karn, Kozilek, Liberator, etc.)
+
+### Fixed
+- **Colorless Commander Display**: Fixed three bugs affecting colorless commander decks
+  - Color identity now displays correctly (grey "C" button with "Colorless" label)
+  - Wastes now correctly added as basic lands in colorless decks
+  - Colored basics (Plains, Island, etc.) no longer incorrectly added to colorless decks
+
+## [2.8.0] - 2025-10-15
+### Summary
+Theme catalog improvements with faster processing, new tag search features, regeneration fixes, and browser performance optimizations.
+
+### Added
+- **Theme Catalog Optimization**:
+  - Consolidated theme enrichment pipeline (single pass instead of 7 separate scripts)
+  - Tag index for fast theme-based card queries
+  - Tag search API with new endpoints for card search, autocomplete, and popular tags
+  - Commander browser theme autocomplete with keyboard navigation
+  - Tag loading infrastructure for batch operations
+- **Theme Browser Keyboard Navigation**: Arrow keys now navigate search results (ArrowUp/Down, Enter to select, Escape to close)
+
+### Changed
+- **Theme Browser Performance**: Theme detail pages now load much faster
+  - Disabled YAML file scanning in production (use `THEME_CATALOG_CHECK_YAML_CHANGES=1` during theme authoring)
+  - Cache invalidation now checks theme_list.json instead of scanning all files
+- **Theme Browser UI**: Removed color filter from theme catalog
+
+### Fixed
+- **Theme Regeneration**: Theme catalog can now be fully rebuilt from scratch without placeholder data
+  - Fixed "Anchor" placeholder issue when regenerating catalog
+  - Examples now generated from actual card data
+  - Theme export preserves all metadata fields
+
+## [2.7.1] - 2025-10-14
+### Summary
+Quick Build UI refinements for improved desktop display.

 ### Fixed
 - Quick Build progress display now uses full desktop width instead of narrow mobile-like layout
- Quick Build completion screen properly transitions to full-width Step 5 layout matching manual build experience_
+- Quick Build completion screen properly transitions to full-width Step 5 layout matching manual build experience

 ## [2.7.0] - 2025-10-14
 ### Summary
--- a/DOCKER.md
+++ b/DOCKER.md
@ -256,6 +256,9 @@ See `.env.example` for the full catalog. Common knobs:
 | `THEME` | `dark` | Initial UI theme (`system`, `light`, or `dark`). |
 | `WEB_STAGE_ORDER` | `new` | Build stage execution order: `new` (creatures→spells→lands) or `legacy` (lands→creatures→spells). |
 | `WEB_IDEALS_UI` | `slider` | Ideal counts interface: `slider` (range inputs with live validation) or `input` (text boxes with placeholders). |
+| `ENABLE_CARD_DETAILS` | `0` | Show card detail pages with similar card recommendations at `/cards/<name>`. |
+| `SIMILARITY_CACHE_ENABLED` | `1` | Use pre-computed similarity cache for fast card detail pages. |
+| `ENABLE_BATCH_BUILD` | `1` | Enable Build X and Compare feature (build multiple decks in parallel and compare results). |

 ### Random build controls

@ -280,6 +283,7 @@ See `.env.example` for the full catalog. Common knobs:
 | `WEB_AUTO_REFRESH_DAYS` | `7` | Refresh `cards.csv` if older than N days. |
 | `WEB_TAG_PARALLEL` | `1` | Use parallel workers during tagging. |
 | `WEB_TAG_WORKERS` | `4` | Worker count for parallel tagging. |
+| `CACHE_CARD_IMAGES` | `0` | Download card images to `card_files/images/` (1=enable, 0=fetch from API on demand). See [Image Caching](docs/IMAGE_CACHING.md). |
 | `WEB_AUTO_ENFORCE` | `0` | Re-export decks after auto-applying compliance fixes. |
 | `WEB_THEME_PICKER_DIAGNOSTICS` | `1` | Enable theme diagnostics endpoints. |

--- a/42
+++ b/42
@ -10,21 +10,42 @@ ENV PYTHONUNBUFFERED=1
 ARG APP_VERSION=dev
 ENV APP_VERSION=${APP_VERSION}

-# Install system dependencies if needed
+# Install system dependencies including Node.js
 RUN apt-get update && apt-get install -y \
    gcc \
+    curl \
+    && curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - \
+    && apt-get install -y nodejs \
    && rm -rf /var/lib/apt/lists/*

-# Copy requirements first for better caching
+# Copy package files for Node.js dependencies
+COPY package.json package-lock.json* ./
+
+# Install Node.js dependencies
+RUN npm install
+
+# Copy Tailwind/TypeScript config files
+COPY tailwind.config.js postcss.config.js tsconfig.json ./
+
+# Copy requirements for Python dependencies (for better caching)
 COPY requirements.txt .

 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt

-# Copy application code
+# Copy Python application code (includes templates needed for Tailwind)
 COPY code/ ./code/
 COPY mypy.ini .

+# Tailwind source is already in code/web/static/tailwind.css from COPY code/
+# TypeScript sources are in code/web/static/ts/ from COPY code/
+
+# Force fresh CSS build by removing any copied styles.css
+RUN rm -f ./code/web/static/styles.css
+
+# Build CSS and TypeScript
+RUN npm run build
+
 # Copy default configs in two locations:
 # 1) /app/config is the live path (may be overlaid by a volume)
 # 2) /app/.defaults/config is preserved in the image for first-run seeding when a volume is mounted
@ -32,11 +53,19 @@ COPY config/ ./config/
 COPY config/ /.defaults/config/
 RUN mkdir -p owned_cards

+# Copy similarity cache if available (pre-built during CI)
+# Store in /.defaults/card_files so it persists after volume mount  
+RUN mkdir -p /.defaults/card_files
+# Copy entire card_files directory (will include cache if present, empty if not)
+# COMMENTED OUT FOR LOCAL DEV: card_files is mounted as volume anyway
+# Uncomment for production builds or CI/CD
+# COPY card_files/ /.defaults/card_files/
+
 # Create necessary directories as mount points
-RUN mkdir -p deck_files logs csv_files config /.defaults
+RUN mkdir -p deck_files logs csv_files card_files config /.defaults

 # Create volumes for persistent data
-VOLUME ["/app/deck_files", "/app/logs", "/app/csv_files", "/app/config", "/app/owned_cards"]
+VOLUME ["/app/deck_files", "/app/logs", "/app/csv_files", "/app/card_files", "/app/config", "/app/owned_cards"]

 # Create symbolic links BEFORE changing working directory
 # These will point to the mounted volumes
@ -44,11 +73,12 @@ RUN cd /app/code && \
    ln -sf /app/deck_files ./deck_files && \
    ln -sf /app/logs ./logs && \
    ln -sf /app/csv_files ./csv_files && \
+    ln -sf /app/card_files ./card_files && \
    ln -sf /app/config ./config && \
    ln -sf /app/owned_cards ./owned_cards

 # Verify symbolic links were created
-RUN cd /app/code && ls -la deck_files logs csv_files config owned_cards
+RUN cd /app/code && ls -la deck_files logs csv_files card_files config owned_cards

 # Set the working directory to code for proper imports
 WORKDIR /app/code
--- a/README.md
+++ b/README.md
@ -21,6 +21,7 @@ A web-first Commander/EDH deckbuilder with a shared core for CLI, headless, and
  - [Initial Setup](#initial-setup)
  - [Owned Library](#owned-library)
  - [Browse Commanders](#browse-commanders)
+  - [Browse Cards](#browse-cards)
  - [Browse Themes](#browse-themes)
  - [Finished Decks](#finished-decks)
  - [Random Build](#random-build)
@ -78,6 +79,12 @@ Every tile on the homepage connects to a workflow. Use these sections as your to
 ### Build a Deck
 Start here for interactive deck creation.
 - Pick commander, themes (primary/secondary/tertiary), bracket, and optional deck name in the unified modal.
+- **Build X and Compare** (`ENABLE_BATCH_BUILD=1`, default): Build 1-10 decks with the same configuration to see variance
+  - Parallel execution (max 5 concurrent) with real-time progress and dynamic time estimates
+  - Comparison view shows card overlap statistics and individual build summaries
+  - **Synergy Builder**: Analyze builds and create optimized "best-of" deck scored by frequency, EDHREC rank, and theme tags
+  - Rebuild button for quick iterations, ZIP export for all builds
+  - See `docs/user_guides/batch_build_compare.md` for full guide
 - **Quick Build**: One-click automation runs the full workflow with live progress (Creatures → Spells → Lands → Final Touches → Summary). Available in New Deck wizard.
 - **Skip Controls**: Granular stage-skipping toggles in New Deck wizard (21 flags: land steps, creature stages, spell categories). Auto-advance without approval prompts.
 - Add supplemental themes in the **Additional Themes** section (ENABLE_CUSTOM_THEMES): fuzzy suggestions, removable chips, and strict/permissive matching toggles respect `THEME_MATCH_MODE` and `USER_THEME_LIMIT`.
@ -103,8 +110,10 @@ Execute saved configs without manual input.

 ### Initial Setup
 Refresh data and caches when formats shift.
- Runs card downloads, CSV regeneration, smart tagging (keywords + protection grants), and commander catalog rebuilds.
- Controlled by `SHOW_SETUP=1` (on by default in compose).
+- **First run**: Auto-downloads pre-tagged card database from GitHub (instant setup)
+- **Manual refresh**: Download button in web UI or run setup locally
+- Runs card downloads, data generation, smart tagging (keywords + protection grants), and commander catalog rebuilds
+- Controlled by `SHOW_SETUP=1` (on by default in compose)
 - **Force a full rebuild (setup + tagging)**:
  ```powershell
  # Docker:
@ -119,7 +128,7 @@ Refresh data and caches when formats shift.
  # With parallel processing and custom worker count:
  python -c "from code.file_setup.setup import initial_setup; from code.tagging.tagger import run_tagging; initial_setup(); run_tagging(parallel=True, max_workers=4)"
  ```
- **Rebuild only CSVs without tagging**:
+- **Rebuild only data without tagging**:
  ```powershell
  # Docker:
  docker compose run --rm web python -c "from code.file_setup.setup import initial_setup; initial_setup()"
@ -164,6 +173,15 @@ Explore the curated commander catalog.
 - Refresh via Initial Setup or the commander catalog script above.
 - MDFC merges and compatibility snapshots are handled automatically; use `--compat-snapshot` on the refresh script to emit an unmerged snapshot.

+### Browse Cards
+Search and explore all 29,839 Magic cards.
+- **Search & Filters**: Smart autocomplete for card names and themes, multi-theme filtering (up to 5), color identity, type, rarity, CMC range, power/toughness
+- **Sorting**: Name A-Z/Z-A, CMC Low/High, Power High, EDHREC Popular
+- **Card Details** (optional): Enable with `ENABLE_CARD_DETAILS=1` for individual card pages with similar card recommendations
+- **Keyboard Shortcuts**: `Enter` to add matches, `Shift+Enter` to apply filters, double `Esc` to clear all
+- **Shareable URLs**: Filter state persists in URL for easy sharing
+- Fast lookups powered by pre-built card index and optional similarity cache (`SIMILARITY_CACHE_ENABLED=1`)
+
 ### Browse Themes
 Investigate theme synergies and diagnostics.
 - `ENABLE_THEMES=1` keeps the tile visible (default).
@ -291,6 +309,7 @@ Most defaults are defined in `docker-compose.yml` and documented in `.env.exampl
 | `WEB_AUTO_REFRESH_DAYS` | `7` | Refresh `cards.csv` if older than N days. |
 | `WEB_TAG_PARALLEL` | `1` | Enable parallel tagging workers. |
 | `WEB_TAG_WORKERS` | `4` | Worker count for tagging (compose default). |
+| `CACHE_CARD_IMAGES` | `0` | Download card images to `card_files/images/` (1=enable, 0=fetch from API on demand). Requires ~3-6 GB. See [Image Caching](docs/IMAGE_CACHING.md). |
 | `WEB_AUTO_ENFORCE` | `0` | Auto-apply bracket enforcement after builds. |
 | `WEB_THEME_PICKER_DIAGNOSTICS` | `1` | Enable theme diagnostics endpoints. |

--- a/RELEASE_NOTES_TEMPLATE.md
+++ b/RELEASE_NOTES_TEMPLATE.md
@ -1,14 +1,111 @@
 # MTG Python Deckbuilder ${VERSION}

+## [Unreleased]
+
 ### Summary
-Minor UI fixes for Quick Build progress and completion display.
+Web UI improvements with Tailwind CSS migration, TypeScript conversion, component library, template validation tests, enhanced code quality tools, and optional card image caching for faster performance and better maintainability.

 ### Added
-_No unreleased additions yet._
+- **Template Validation Tests**: Comprehensive test suite ensuring HTML/template quality
+  - Validates Jinja2 syntax and structure
+  - Checks for common HTML issues (duplicate IDs, balanced tags)
+  - Basic accessibility validation
+  - Prevents regression in template quality
+- **Code Quality Tools**: Enhanced development tooling for maintainability
+  - Automated utilities for code cleanup
+  - Improved type checking configuration
+- **Card Image Caching**: Optional local image cache for faster card display
+  - Downloads card images from Scryfall bulk data (respects API guidelines)
+  - Graceful fallback to Scryfall API for uncached images
+  - Enabled via `CACHE_CARD_IMAGES=1` environment variable
+  - Integrated with setup/tagging process
+  - Statistics endpoint with intelligent caching (weekly refresh, matching card data staleness)
+- **Component Library**: Living documentation of reusable UI components at `/docs/components`
+  - Interactive examples of all buttons, modals, forms, cards, and panels
+  - Jinja2 macros for consistent component usage
+  - Component partial templates for reuse across pages
+- **TypeScript Migration**: Migrated JavaScript codebase to TypeScript for better type safety
+  - Converted `components.js` (376 lines) and `app.js` (1390 lines) to TypeScript
+  - Created shared type definitions for state management, telemetry, HTMX, and UI components
+  - Integrated TypeScript compilation into build process (`npm run build:ts`)
+  - Compiled JavaScript output in `code/web/static/js/` directory
+  - Docker build automatically compiles TypeScript during image creation

 ### Changed
-_No unreleased changes yet._
+- **Inline JavaScript Cleanup**: Removed legacy card hover system (~230 lines of unused code)
+- **JavaScript Consolidation**: Extracted inline scripts to TypeScript modules
+  - Created `cardHover.ts` for unified hover panel functionality
+  - Created `cardImages.ts` for card image loading with automatic retry fallbacks
+  - Reduced inline script size in base template for better maintainability
+- **Migrated CSS to Tailwind**: Consolidated and unified CSS architecture
+  - Tailwind CSS v3 with custom MTG color palette
+  - PostCSS build pipeline with autoprefixer
+  - Reduced inline styles in templates (moved to shared CSS classes)
+  - Organized CSS into functional sections with clear documentation
+- **Theme Visual Improvements**: Enhanced readability and consistency across all theme modes
+  - Light mode: Darker text for improved readability, warm earth tone color palette
+  - Dark mode: Refined contrast for better visual hierarchy
+  - High-contrast mode: Optimized for maximum accessibility
+  - Consistent hover states across all interactive elements
+  - Improved visibility of form inputs and controls
+- **JavaScript Modernization**: Updated to modern JavaScript patterns
+  - Converted `var` declarations to `const`/`let`
+  - Added TypeScript type annotations for better IDE support and error catching
+  - Consolidated event handlers and utility functions
+- **Docker Build Optimization**: Improved developer experience
+  - Hot reload enabled for templates and static files
+  - Volume mounts for rapid iteration without rebuilds
+- **Template Modernization**: Migrated templates to use component system
+- **Type Checking Configuration**: Improved Python code quality tooling
+  - Configured type checker for better error detection
+  - Optimized linting rules for development workflow
+- **Intelligent Synergy Builder**: Analyze multiple builds and create optimized "best-of" deck
+  - Scores cards by frequency (50%), EDHREC rank (25%), and theme tags (25%)
+  - 10% bonus for cards appearing in 80%+ of builds
+  - Color-coded synergy scores in preview (green=high, red=low)
+  - Partner commander support with combined color identity
+  - Multi-copy card tracking (e.g., 8 Mountains, 7 Islands)
+  - Export synergy deck with full metadata (CSV, TXT, JSON files)
+- `ENABLE_BATCH_BUILD` environment variable to toggle feature (default: enabled)
+- Detailed progress logging for multi-build orchestration
+- User guide: `docs/user_guides/batch_build_compare.md`
+- **Web UI Component Library**: Standardized UI components for consistent design across all pages
+  - 5 component partial template files (buttons, modals, forms, cards, panels)
+  - ~900 lines of component CSS styles
+  - Interactive JavaScript utilities (components.js)
+  - Living component library page at `/docs/components`
+  - 1600+ lines developer documentation (component_catalog.md)
+- **Custom UI Enhancements**:
+  - Darker gray styling for home page buttons
+  - Visual highlighting for selected theme chips in deck builder
+
+### Removed
+_None_

 ### Fixed
- Quick Build progress display now uses full desktop width instead of narrow mobile-like layout
- Quick Build completion screen properly transitions to full-width Step 5 layout matching manual build experience_
+- **Template Quality**: Resolved HTML structure issues
+  - Fixed duplicate ID attributes in templates
+  - Removed erroneous template block tags
+  - Corrected structure for HTMX fragments
+- **Code Quality**: Resolved type checking warnings and improved code maintainability
+  - Fixed type annotation inconsistencies
+  - Cleaned up redundant code quality suppressions
+  - Corrected configuration conflicts
+
+### Performance
+- Hot reload for CSS/template changes (no Docker rebuild needed)
+- Optional image caching reduces Scryfall API calls
+- Faster page loads with optimized CSS
+- TypeScript compilation produces optimized JavaScript
+
+### For Users
+- Faster card image loading with optional caching
+- Cleaner, more consistent web UI design
+- Improved page load performance
+- More reliable JavaScript behavior
+
+### Deprecated
+_None_
+
+### Security
+_None_
--- a/code/deck_builder/init.py
+++ b/code/deck_builder/init.py
@ -4,6 +4,6 @@ __all__ = ['DeckBuilder']
 def __getattr__(name):
    # Lazy-load DeckBuilder to avoid side effects during import of submodules
    if name == 'DeckBuilder':
-        from .builder import DeckBuilder  # type: ignore
+        from .builder import DeckBuilder
        return DeckBuilder
    raise AttributeError(name)
--- a/code/deck_builder/background_loader.py
+++ b/code/deck_builder/background_loader.py
@ -1,22 +1,18 @@
-"""Loader for background cards derived from `background_cards.csv`."""
+"""Loader for background cards derived from all_cards.parquet."""
 from __future__ import annotations

 import ast
-import csv
+import re
 from dataclasses import dataclass
 from functools import lru_cache
 from pathlib import Path
-import re
-from typing import Mapping, Tuple
+from typing import Any, Mapping, Tuple

-from code.logging_util import get_logger
+from logging_util import get_logger
 from deck_builder.partner_background_utils import analyze_partner_background
-from path_util import csv_dir

 LOGGER = get_logger(__name__)

-BACKGROUND_FILENAME = "background_cards.csv"
-

@dataclass(frozen=True, slots=True)
 class BackgroundCard:
@ -57,7 +53,7 @@ class BackgroundCatalog:
 def load_background_cards(
    source_path: str | Path | None = None,
 ) -> BackgroundCatalog:
-    """Load and cache background card data."""
+    """Load and cache background card data from all_cards.parquet."""

    resolved = _resolve_background_path(source_path)
    try:
@ -65,7 +61,7 @@ def load_background_cards(
        mtime_ns = getattr(stat, "st_mtime_ns", int(stat.st_mtime * 1_000_000_000))
        size = stat.st_size
    except FileNotFoundError:
-        raise FileNotFoundError(f"Background CSV not found at {resolved}") from None
+        raise FileNotFoundError(f"Background data not found at {resolved}") from None

    entries, version = _load_background_cards_cached(str(resolved), mtime_ns)
    etag = f"{size}-{mtime_ns}-{len(entries)}"
@ -88,46 +84,49 @@ def _load_background_cards_cached(path_str: str, mtime_ns: int) -> Tuple[Tuple[B
    if not path.exists():
        return tuple(), "unknown"

-    with path.open("r", encoding="utf-8", newline="") as handle:
-        first_line = handle.readline()
-        version = "unknown"
-        if first_line.startswith("#"):
-            version = _parse_version(first_line)
-        else:
-            handle.seek(0)
-        reader = csv.DictReader(handle)
-        if reader.fieldnames is None:
-            return tuple(), version
-        entries = _rows_to_cards(reader)
+    try:
+        import pandas as pd
+        df = pd.read_parquet(path, engine="pyarrow")
+        
+        # Filter for background cards
+        if 'isBackground' not in df.columns:
+            LOGGER.warning("isBackground column not found in %s", path)
+            return tuple(), "unknown"
+        
+        df_backgrounds = df[df['isBackground']].copy()
+        
+        if len(df_backgrounds) == 0:
+            LOGGER.warning("No background cards found in %s", path)
+            return tuple(), "unknown"
+        
+        entries = _rows_to_cards(df_backgrounds)
+        version = "parquet"
+        
+    except Exception as e:
+        LOGGER.error("Failed to load backgrounds from %s: %s", path, e)
+        return tuple(), "unknown"

    frozen = tuple(entries)
    return frozen, version


 def _resolve_background_path(override: str | Path | None) -> Path:
+    """Resolve path to all_cards.parquet."""
    if override:
        return Path(override).resolve()
-    return (Path(csv_dir()) / BACKGROUND_FILENAME).resolve()
+    # Use card_files/processed/all_cards.parquet
+    return Path("card_files/processed/all_cards.parquet").resolve()


-def _parse_version(line: str) -> str:
-    tokens = line.lstrip("# ").strip().split()
-    for token in tokens:
-        if "=" not in token:
-            continue
-        key, value = token.split("=", 1)
-        if key == "version":
-            return value
-    return "unknown"
-
-
-def _rows_to_cards(reader: csv.DictReader) -> list[BackgroundCard]:
+def _rows_to_cards(df) -> list[BackgroundCard]:
+    """Convert DataFrame rows to BackgroundCard objects."""
    entries: list[BackgroundCard] = []
    seen: set[str] = set()
-    for raw in reader:
-        if not raw:
+    
+    for _, row in df.iterrows():
+        if row.empty:
            continue
-        card = _row_to_card(raw)
+        card = _row_to_card(row)
        if card is None:
            continue
        key = card.display_name.lower()
@ -135,20 +134,35 @@ def _rows_to_cards(reader: csv.DictReader) -> list[BackgroundCard]:
            continue
        seen.add(key)
        entries.append(card)
+    
    entries.sort(key=lambda card: card.display_name)
    return entries


-def _row_to_card(row: Mapping[str, str]) -> BackgroundCard | None:
-    name = _clean_str(row.get("name"))
-    face_name = _clean_str(row.get("faceName")) or None
+def _row_to_card(row) -> BackgroundCard | None:
+    """Convert a DataFrame row to a BackgroundCard."""
+    # Helper to safely get values from DataFrame row
+    def get_val(key: str):
+        try:
+            if hasattr(row, key):
+                val = getattr(row, key)
+                # Handle pandas NA/None
+                if val is None or (hasattr(val, '__class__') and 'NA' in val.__class__.__name__):
+                    return None
+                return val
+            return None
+        except Exception:
+            return None
+    
+    name = _clean_str(get_val("name"))
+    face_name = _clean_str(get_val("faceName")) or None
    display = face_name or name
    if not display:
        return None

-    type_line = _clean_str(row.get("type"))
-    oracle_text = _clean_multiline(row.get("text"))
-    raw_theme_tags = tuple(_parse_literal_list(row.get("themeTags")))
+    type_line = _clean_str(get_val("type"))
+    oracle_text = _clean_multiline(get_val("text"))
+    raw_theme_tags = tuple(_parse_literal_list(get_val("themeTags")))
    detection = analyze_partner_background(type_line, oracle_text, raw_theme_tags)
    if not detection.is_background:
        return None
@ -158,18 +172,18 @@ def _row_to_card(row: Mapping[str, str]) -> BackgroundCard | None:
        face_name=face_name,
        display_name=display,
        slug=_slugify(display),
-        color_identity=_parse_color_list(row.get("colorIdentity")),
-        colors=_parse_color_list(row.get("colors")),
-        mana_cost=_clean_str(row.get("manaCost")),
-        mana_value=_parse_float(row.get("manaValue")),
+        color_identity=_parse_color_list(get_val("colorIdentity")),
+        colors=_parse_color_list(get_val("colors")),
+        mana_cost=_clean_str(get_val("manaCost")),
+        mana_value=_parse_float(get_val("manaValue")),
        type_line=type_line,
        oracle_text=oracle_text,
-        keywords=tuple(_split_list(row.get("keywords"))),
+        keywords=tuple(_split_list(get_val("keywords"))),
        theme_tags=tuple(tag for tag in raw_theme_tags if tag),
        raw_theme_tags=raw_theme_tags,
-        edhrec_rank=_parse_int(row.get("edhrecRank")),
-        layout=_clean_str(row.get("layout")) or "normal",
-        side=_clean_str(row.get("side")) or None,
+        edhrec_rank=_parse_int(get_val("edhrecRank")),
+        layout=_clean_str(get_val("layout")) or "normal",
+        side=_clean_str(get_val("side")) or None,
    )


@ -189,8 +203,19 @@ def _clean_multiline(value: object) -> str:
 def _parse_literal_list(value: object) -> list[str]:
    if value is None:
        return []
-    if isinstance(value, (list, tuple, set)):
+    
+    # Check if it's a numpy array (from Parquet/pandas)
+    is_numpy = False
+    try:
+        import numpy as np
+        is_numpy = isinstance(value, np.ndarray)
+    except ImportError:
+        pass
+    
+    # Handle lists, tuples, sets, and numpy arrays
+    if isinstance(value, (list, tuple, set)) or is_numpy:
        return [str(item).strip() for item in value if str(item).strip()]
+    
    text = str(value).strip()
    if not text:
        return []
@ -205,6 +230,17 @@ def _parse_literal_list(value: object) -> list[str]:


 def _split_list(value: object) -> list[str]:
+    # Check if it's a numpy array (from Parquet/pandas)
+    is_numpy = False
+    try:
+        import numpy as np
+        is_numpy = isinstance(value, np.ndarray)
+    except ImportError:
+        pass
+    
+    if isinstance(value, (list, tuple, set)) or is_numpy:
+        return [str(item).strip() for item in value if str(item).strip()]
+    
    text = _clean_str(value)
    if not text:
        return []
@ -213,6 +249,18 @@ def _split_list(value: object) -> list[str]:


 def _parse_color_list(value: object) -> Tuple[str, ...]:
+    # Check if it's a numpy array (from Parquet/pandas)
+    is_numpy = False
+    try:
+        import numpy as np
+        is_numpy = isinstance(value, np.ndarray)
+    except ImportError:
+        pass
+    
+    if isinstance(value, (list, tuple, set)) or is_numpy:
+        parts = [str(item).strip().upper() for item in value if str(item).strip()]
+        return tuple(parts)
+    
    text = _clean_str(value)
    if not text:
        return tuple()
--- a/code/deck_builder/builder.py
+++ b/code/deck_builder/builder.py
@ -95,7 +95,7 @@ class DeckBuilder(
                # If a seed was assigned pre-init, use it
                if self.seed is not None:
                    # Import here to avoid any heavy import cycles at module import time
-                    from random_util import set_seed as _set_seed  # type: ignore
+                    from random_util import set_seed as _set_seed
                    self._rng = _set_seed(int(self.seed))
                else:
                    self._rng = random.Random()
@ -107,7 +107,7 @@ class DeckBuilder(
    def set_seed(self, seed: int | str) -> None:
        """Set deterministic seed for this builder and reset its RNG instance."""
        try:
-            from random_util import derive_seed_from_string as _derive, set_seed as _set_seed  # type: ignore
+            from random_util import derive_seed_from_string as _derive, set_seed as _set_seed
            s = _derive(seed)
            self.seed = int(s)
            self._rng = _set_seed(s)
@ -154,28 +154,33 @@ class DeckBuilder(
        start_ts = datetime.datetime.now()
        logger.info("=== Deck Build: BEGIN ===")
        try:
-            # Ensure CSVs exist and are tagged before starting any deck build logic
+            # M4: Ensure Parquet file exists and is tagged before starting any deck build logic
            try:
                import time as _time
                import json as _json
                from datetime import datetime as _dt
-                cards_path = os.path.join(CSV_DIRECTORY, 'cards.csv')
+                from code.path_util import get_processed_cards_path
+                
+                parquet_path = get_processed_cards_path()
                flag_path = os.path.join(CSV_DIRECTORY, '.tagging_complete.json')
                refresh_needed = False
-                if not os.path.exists(cards_path):
-                    logger.info("cards.csv not found. Running initial setup and tagging before deck build...")
+                
+                if not os.path.exists(parquet_path):
+                    logger.info("all_cards.parquet not found. Running initial setup and tagging before deck build...")
                    refresh_needed = True
                else:
                    try:
-                        age_seconds = _time.time() - os.path.getmtime(cards_path)
+                        age_seconds = _time.time() - os.path.getmtime(parquet_path)
                        if age_seconds > 7 * 24 * 60 * 60:
-                            logger.info("cards.csv is older than 7 days. Refreshing data before deck build...")
+                            logger.info("all_cards.parquet is older than 7 days. Refreshing data before deck build...")
                            refresh_needed = True
                    except Exception:
                        pass
+                
                if not os.path.exists(flag_path):
                    logger.info("Tagging completion flag not found. Performing full tagging before deck build...")
                    refresh_needed = True
+                
                if refresh_needed:
                    initial_setup()
                    from tagging import tagger as _tagger
@ -187,7 +192,7 @@ class DeckBuilder(
                    except Exception:
                        logger.warning("Failed to write tagging completion flag (non-fatal).")
            except Exception as e:
-                logger.error(f"Failed ensuring CSVs before deck build: {e}")
+                logger.error(f"Failed ensuring Parquet file before deck build: {e}")
            self.run_initial_setup()
            self.run_deck_build_step1()
            self.run_deck_build_step2()
@ -210,7 +215,7 @@ class DeckBuilder(
            try:
                # Compute a quick compliance snapshot here to hint at upcoming enforcement
                if hasattr(self, 'compute_and_print_compliance') and not getattr(self, 'headless', False):
-                    from deck_builder.brackets_compliance import evaluate_deck as _eval  # type: ignore
+                    from deck_builder.brackets_compliance import evaluate_deck as _eval
                    bracket_key = str(getattr(self, 'bracket_name', '') or getattr(self, 'bracket_level', 'core')).lower()
                    commander = getattr(self, 'commander_name', None)
                    snap = _eval(self.card_library, commander_name=commander, bracket=bracket_key)
@ -235,15 +240,15 @@ class DeckBuilder(
                    csv_path = self.export_decklist_csv()
                    # Persist CSV path immediately (before any later potential exceptions)
                    try:
-                        self.last_csv_path = csv_path  # type: ignore[attr-defined]
+                        self.last_csv_path = csv_path
                    except Exception:
                        pass
                    try:
                        import os as _os
                        base, _ext = _os.path.splitext(_os.path.basename(csv_path))
-                        txt_path = self.export_decklist_text(filename=base + '.txt')  # type: ignore[attr-defined]
+                        txt_path = self.export_decklist_text(filename=base + '.txt')
                        try:
-                            self.last_txt_path = txt_path  # type: ignore[attr-defined]
+                            self.last_txt_path = txt_path
                        except Exception:
                            pass
                        # Display the text file contents for easy copy/paste to online deck builders
@ -251,18 +256,18 @@ class DeckBuilder(
                        # Compute bracket compliance and save a JSON report alongside exports
                        try:
                            if hasattr(self, 'compute_and_print_compliance'):
-                                report0 = self.compute_and_print_compliance(base_stem=base)  # type: ignore[attr-defined]
+                                report0 = self.compute_and_print_compliance(base_stem=base)
                                # If non-compliant and interactive, offer enforcement now
                                try:
                                    if isinstance(report0, dict) and report0.get('overall') == 'FAIL' and not getattr(self, 'headless', False):
-                                        from deck_builder.phases.phase6_reporting import ReportingMixin as _RM  # type: ignore
+                                        from deck_builder.phases.phase6_reporting import ReportingMixin as _RM
                                        if isinstance(self, _RM) and hasattr(self, 'enforce_and_reexport'):
                                            self.output_func("One or more bracket limits exceeded. Enter to auto-resolve, or Ctrl+C to skip.")
                                            try:
                                                _ = self.input_func("")
                                            except Exception:
                                                pass
-                                            self.enforce_and_reexport(base_stem=base, mode='prompt')  # type: ignore[attr-defined]
+                                            self.enforce_and_reexport(base_stem=base, mode='prompt')
                                except Exception:
                                    pass
                        except Exception:
@ -290,12 +295,12 @@ class DeckBuilder(
                                    cfg_dir = 'config'
                                if cfg_dir:
                                    _os.makedirs(cfg_dir, exist_ok=True)
-                                    self.export_run_config_json(directory=cfg_dir, filename=base + '.json')  # type: ignore[attr-defined]
+                                    self.export_run_config_json(directory=cfg_dir, filename=base + '.json')
                                if cfg_path_env:
                                    cfg_dir2 = _os.path.dirname(cfg_path_env) or '.'
                                    cfg_name2 = _os.path.basename(cfg_path_env)
                                    _os.makedirs(cfg_dir2, exist_ok=True)
-                                    self.export_run_config_json(directory=cfg_dir2, filename=cfg_name2)  # type: ignore[attr-defined]
+                                    self.export_run_config_json(directory=cfg_dir2, filename=cfg_name2)
                            except Exception:
                                pass
                    except Exception:
@ -303,8 +308,8 @@ class DeckBuilder(
                else:
                    # Mark suppression so random flow knows nothing was exported yet
                    try:
-                        self.last_csv_path = None  # type: ignore[attr-defined]
-                        self.last_txt_path = None  # type: ignore[attr-defined]
+                        self.last_csv_path = None
+                        self.last_txt_path = None
                    except Exception:
                        pass
            # If owned-only and deck not complete, print a note
@ -619,8 +624,8 @@ class DeckBuilder(
            try:
                rec.card_library = rec_subset
                # Export CSV and TXT with suffix
-                rec.export_decklist_csv(directory='deck_files', filename=base_stem + '_recommendations.csv', suppress_output=True)  # type: ignore[attr-defined]
-                rec.export_decklist_text(directory='deck_files', filename=base_stem + '_recommendations.txt', suppress_output=True)  # type: ignore[attr-defined]
+                rec.export_decklist_csv(directory='deck_files', filename=base_stem + '_recommendations.csv', suppress_output=True)
+                rec.export_decklist_text(directory='deck_files', filename=base_stem + '_recommendations.txt', suppress_output=True)
            finally:
                rec.card_library = original_lib
            # Notify user succinctly
@ -832,14 +837,47 @@ class DeckBuilder(
    def load_commander_data(self) -> pd.DataFrame:
        if self._commander_df is not None:
            return self._commander_df
-        df = pd.read_csv(
-            bc.COMMANDER_CSV_PATH,
-            converters=getattr(bc, "COMMANDER_CONVERTERS", None)
-        )
+        
+        # M7: Try loading from dedicated commander cache first (fast path)
+        from path_util import get_commander_cards_path
+        from file_setup.data_loader import DataLoader
+        
+        commander_path = get_commander_cards_path()
+        if os.path.exists(commander_path):
+            try:
+                loader = DataLoader()
+                df = loader.read_cards(commander_path, format="parquet")
+                
+                # Ensure required columns exist with proper defaults
+                if "themeTags" not in df.columns:
+                    df["themeTags"] = [[] for _ in range(len(df))]
+                if "creatureTypes" not in df.columns:
+                    df["creatureTypes"] = [[] for _ in range(len(df))]
+                
+                self._commander_df = df
+                return df
+            except Exception:
+                # Fall through to legacy path if cache read fails
+                pass
+        
+        # M4: Fallback - Load commanders from full Parquet file (slower)
+        from deck_builder import builder_utils as bu
+        from deck_builder import builder_constants as bc
+        
+        all_cards_df = bu._load_all_cards_parquet()
+        if all_cards_df.empty:
+            # Fallback to empty DataFrame with expected columns
+            return pd.DataFrame(columns=['name', 'themeTags', 'creatureTypes'])
+        
+        # Filter to only commander-eligible cards
+        df = bc.get_commanders(all_cards_df)
+        
+        # Ensure required columns exist with proper defaults
        if "themeTags" not in df.columns:
            df["themeTags"] = [[] for _ in range(len(df))]
        if "creatureTypes" not in df.columns:
            df["creatureTypes"] = [[] for _ in range(len(df))]
+        
        self._commander_df = df
        return df

@ -1063,8 +1101,11 @@ class DeckBuilder(
            if isinstance(raw_ci, list):
                colors_list = [str(c).strip().upper() for c in raw_ci]
            elif isinstance(raw_ci, str) and raw_ci.strip():
+                # Handle the literal string "Colorless" specially (from commander_cards.csv)
+                if raw_ci.strip().lower() == 'colorless':
+                    colors_list = []
                # Could be formatted like "['B','G']" or 'BG'; attempt simple parsing
-                if ',' in raw_ci:
+                elif ',' in raw_ci:
                    colors_list = [c.strip().strip("'[] ").upper() for c in raw_ci.split(',') if c.strip().strip("'[] ")]
                else:
                    colors_list = [c.upper() for c in raw_ci if c.isalpha()]
@ -1122,9 +1163,9 @@ class DeckBuilder(
        return full, load_files

    def setup_dataframes(self) -> pd.DataFrame:
-        """Load all csv files for current color identity into one combined DataFrame.
+        """Load cards from all_cards.parquet and filter by current color identity.

-        Each file stem in files_to_load corresponds to csv_files/{stem}_cards.csv.
+        M4: Migrated from CSV to Parquet. Filters by color identity using colorIdentity column.
        The result is cached and returned. Minimal validation only (non-empty, required columns exist if known).
        """
        if self._combined_cards_df is not None:
@ -1132,29 +1173,53 @@ class DeckBuilder(
        if not self.files_to_load:
            # Attempt to determine if not yet done
            self.determine_color_identity()
-        dfs = []
-        required = getattr(bc, 'CSV_REQUIRED_COLUMNS', [])
-        from path_util import csv_dir as _csv_dir
-        base = _csv_dir()
-        for stem in self.files_to_load:
-            path = f"{base}/{stem}_cards.csv"
-            try:
-                df = pd.read_csv(path)
-                if required:
-                    missing = [c for c in required if c not in df.columns]
-                    if missing:
-                        # Skip or still keep with warning; choose to warn
-                        self.output_func(f"Warning: {path} missing columns: {missing}")
-                dfs.append(df)
-            except FileNotFoundError:
-                self.output_func(f"Warning: CSV file not found: {path}")
-                continue
-        if not dfs:
-            raise RuntimeError("No CSV files loaded for color identity.")
-        combined = pd.concat(dfs, axis=0, ignore_index=True)
+        
+        # M4: Load from Parquet instead of CSV files
+        from deck_builder import builder_utils as bu
+        all_cards_df = bu._load_all_cards_parquet()
+        
+        if all_cards_df is None or all_cards_df.empty:
+            raise RuntimeError("Failed to load all_cards.parquet or file is empty.")
+        
+        # M4: Filter by color identity instead of loading multiple CSVs
+        # Get the colors from self.color_identity (e.g., {'W', 'U', 'B', 'G'})
+        if hasattr(self, 'color_identity') and self.color_identity:
+            # Determine which cards can be played in this color identity
+            # A card can be played if its color identity is a subset of the commander's color identity
+            def card_matches_identity(card_colors):
+                """Check if card's color identity is legal in commander's identity."""
+                if card_colors is None or (isinstance(card_colors, float) and pd.isna(card_colors)):
+                    # Colorless cards can go in any deck
+                    return True
+                if isinstance(card_colors, str):
+                    # Handle string format like "B, G, R, U" (note the spaces after commas)
+                    card_colors = {c.strip() for c in card_colors.split(',')} if card_colors else set()
+                elif isinstance(card_colors, list):
+                    card_colors = set(card_colors)
+                else:
+                    # Unknown format, be permissive
+                    return True
+                # Card is legal if its colors are a subset of commander colors
+                return card_colors.issubset(self.color_identity)
+            
+            if 'colorIdentity' in all_cards_df.columns:
+                mask = all_cards_df['colorIdentity'].apply(card_matches_identity)
+                combined = all_cards_df[mask].copy()
+                logger.info(f"M4 COLOR_FILTER: Filtered {len(all_cards_df)} cards to {len(combined)} cards for identity {sorted(self.color_identity)}")
+            else:
+                logger.warning("M4 COLOR_FILTER: colorIdentity column missing, using all cards")
+                combined = all_cards_df.copy()
+        else:
+            # No color identity set, use all cards
+            logger.warning("M4 COLOR_FILTER: No color identity set, using all cards")
+            combined = all_cards_df.copy()
+        
        # Drop duplicate rows by 'name' if column exists
        if 'name' in combined.columns:
+            before_dedup = len(combined)
            combined = combined.drop_duplicates(subset='name', keep='first')
+            if len(combined) < before_dedup:
+                logger.info(f"M4 DEDUP: Removed {before_dedup - len(combined)} duplicate names")
        # If owned-only mode, filter combined pool to owned names (case-insensitive)
        if self.use_owned_only:
            try:
@ -1175,6 +1240,54 @@ class DeckBuilder(
                self.output_func(f"Owned-only mode: failed to filter combined pool: {_e}")
    # Soft prefer-owned does not filter the pool; biasing is applied later at selection time
        
+        # M2: Filter out cards useless in colorless identity decks
+        if self.color_identity_key == 'COLORLESS':
+            logger.info(f"M2 COLORLESS FILTER: Activated for color_identity_key='{self.color_identity_key}'")
+            try:
+                if 'metadataTags' in combined.columns and 'name' in combined.columns:
+                    # Find cards with "Useless in Colorless" metadata tag
+                    def has_useless_tag(metadata_tags):
+                        # Handle various types: NaN, empty list, list with values
+                        if metadata_tags is None:
+                            return False
+                        # Check for pandas NaN or numpy NaN
+                        try:
+                            import numpy as np
+                            if isinstance(metadata_tags, float) and np.isnan(metadata_tags):
+                                return False
+                        except (TypeError, ValueError):
+                            pass
+                        # Handle empty list or numpy array
+                        if isinstance(metadata_tags, (list, np.ndarray)):
+                            if len(metadata_tags) == 0:
+                                return False
+                            return 'Useless in Colorless' in metadata_tags
+                        return False
+                    
+                    useless_mask = combined['metadataTags'].apply(has_useless_tag)
+                    useless_count = useless_mask.sum()
+                    
+                    if useless_count > 0:
+                        useless_names = combined.loc[useless_mask, 'name'].tolist()
+                        combined = combined[~useless_mask].copy()
+                        self.output_func(f"Colorless commander: filtered out {useless_count} cards useless in colorless identity")
+                        logger.info(f"M2 COLORLESS FILTER: Filtered out {useless_count} cards")
+                        # Log first few cards for transparency
+                        for name in useless_names[:3]:
+                            self.output_func(f"  - Filtered: {name}")
+                            logger.info(f"M2 COLORLESS FILTER: Removed '{name}'")
+                        if useless_count > 3:
+                            self.output_func(f"  - ... and {useless_count - 3} more")
+                    else:
+                        logger.warning(f"M2 COLORLESS FILTER: No cards found with 'Useless in Colorless' tag!")
+                else:
+                    logger.warning(f"M2 COLORLESS FILTER: Missing required columns (metadataTags or name)")
+            except Exception as e:
+                self.output_func(f"Warning: Failed to apply colorless filter: {e}")
+                logger.error(f"M2 COLORLESS FILTER: Exception: {e}", exc_info=True)
+        else:
+            logger.info(f"M2 COLORLESS FILTER: Not activated - color_identity_key='{self.color_identity_key}' (not 'Colorless')")
+        
        # Apply exclude card filtering (M0.5: Phase 1 - Exclude Only)
        if hasattr(self, 'exclude_cards') and self.exclude_cards:
            try:
@ -1730,7 +1843,7 @@ class DeckBuilder(
                from deck_builder import builder_constants as bc
                from settings import MULTIPLE_COPY_CARDS
            except Exception:
-                MULTIPLE_COPY_CARDS = []  # type: ignore
+                MULTIPLE_COPY_CARDS = []
            is_land = 'land' in str(card_type or entry.get('Card Type','')).lower()
            is_basic = False
            try:
@ -1892,10 +2005,10 @@ class DeckBuilder(
            return
        block = self._format_commander_pretty(self.commander_row)
        self.output_func("\n" + block)
-        # New: show which CSV files (stems) were loaded for this color identity
-        if self.files_to_load:
-            file_list = ", ".join(f"{stem}_cards.csv" for stem in self.files_to_load)
-            self.output_func(f"Card Pool Files: {file_list}")
+        # M4: Show that we're loading from unified Parquet file
+        if hasattr(self, 'color_identity') and self.color_identity:
+            colors = ', '.join(sorted(self.color_identity))
+            self.output_func(f"Card Pool: all_cards.parquet (filtered to {colors} identity)")
        # Owned-only status
        if getattr(self, 'use_owned_only', False):
            try:
@ -2240,7 +2353,7 @@ class DeckBuilder(
        rng = getattr(self, 'rng', None)
        try:
            if rng:
-                rng.shuffle(bucket_keys)  # type: ignore
+                rng.shuffle(bucket_keys)
            else:
                random.shuffle(bucket_keys)
        except Exception:
--- a/code/deck_builder/builder_constants.py
+++ b/code/deck_builder/builder_constants.py
@ -1,9 +1,12 @@
-from typing import Dict, List, Final, Tuple, Union, Callable, Any as _Any
+from typing import Dict, List, Final, Tuple, Union, Callable, Any
 from settings import CARD_DATA_COLUMNS as CSV_REQUIRED_COLUMNS  # unified
 from path_util import csv_dir
+import pandas as pd

 __all__ = [
-    'CSV_REQUIRED_COLUMNS'
+    'CSV_REQUIRED_COLUMNS',
+    'get_commanders',
+    'get_backgrounds',
 ]
 import ast

@ -14,9 +17,11 @@ MAX_FUZZY_CHOICES: Final[int] = 5  # Maximum number of fuzzy match choices

 # Commander-related constants
 DUPLICATE_CARD_FORMAT: Final[str] = '{card_name} x {count}'
+# M4: Deprecated - use Parquet loading instead
 COMMANDER_CSV_PATH: Final[str] = f"{csv_dir()}/commander_cards.csv"
 DECK_DIRECTORY = '../deck_files'
-COMMANDER_CONVERTERS: Final[Dict[str, str]] = {
+# M4: Deprecated - Parquet handles types natively (no converters needed)
+COMMANDER_CONVERTERS: Final[Dict[str, Any]] = {
    'themeTags': ast.literal_eval,
    'creatureTypes': ast.literal_eval,
    'roleTags': ast.literal_eval,
@ -135,18 +140,18 @@ OTHER_COLOR_MAP: Final[Dict[str, Tuple[str, List[str], List[str]]]] = {
 }

 # Card category validation rules
-CREATURE_VALIDATION_RULES: Final[Dict[str, Dict[str, Union[str, int, float, bool]]]] = {
+CREATURE_VALIDATION_RULES: Final[Dict[str, Dict[str, Any]]] = {
    'power': {'type': ('str', 'int', 'float'), 'required': True},
    'toughness': {'type': ('str', 'int', 'float'), 'required': True},
    'creatureTypes': {'type': 'list', 'required': True}
 }

-SPELL_VALIDATION_RULES: Final[Dict[str, Dict[str, Union[str, int, float, bool]]]] = {
+SPELL_VALIDATION_RULES: Final[Dict[str, Dict[str, Any]]] = {
    'manaCost': {'type': 'str', 'required': True},
    'text': {'type': 'str', 'required': True}
 }

-LAND_VALIDATION_RULES: Final[Dict[str, Dict[str, Union[str, int, float, bool]]]] = {
+LAND_VALIDATION_RULES: Final[Dict[str, Dict[str, Any]]] = {
    'type': {'type': ('str', 'object'), 'required': True},
    'text': {'type': ('str', 'object'), 'required': False}
 }
@ -286,7 +291,7 @@ COLORED_MANA_SYMBOLS: Final[List[str]] = ['{w}','{u}','{b}','{r}','{g}']


 # Basic Lands
-BASIC_LANDS = ['Plains', 'Island', 'Swamp', 'Mountain', 'Forest']
+BASIC_LANDS = ['Plains', 'Island', 'Swamp', 'Mountain', 'Forest', 'Wastes']

 # Basic land mappings
 COLOR_TO_BASIC_LAND: Final[Dict[str, str]] = {
@ -521,7 +526,7 @@ CSV_READ_TIMEOUT: Final[int] = 30  # Timeout in seconds for CSV read operations
 CSV_PROCESSING_BATCH_SIZE: Final[int] = 1000  # Number of rows to process in each batch

 # CSV validation configuration
-CSV_VALIDATION_RULES: Final[Dict[str, Dict[str, Union[str, int, float]]]] = {
+CSV_VALIDATION_RULES: Final[Dict[str, Dict[str, Any]]] = {
    'name': {'type': ('str', 'object'), 'required': True, 'unique': True},
    'edhrecRank': {'type': ('str', 'int', 'float', 'object'), 'min': 0, 'max': 100000},
    'manaValue': {'type': ('str', 'int', 'float', 'object'), 'min': 0, 'max': 20},
@ -597,12 +602,12 @@ GAME_CHANGERS: Final[List[str]] = [
 # - color_identity: list[str] of required color letters (subset must be in commander CI)
 # - printed_cap: int | None (None means no printed cap)
 # - exclusive_group: str | None (at most one from the same group)
-# - triggers: { tags_any: list[str], tags_all: list[str] }
+# - triggers: { tagsAny: list[str], tags_all: list[str] }
 # - default_count: int (default 25)
 # - rec_window: tuple[int,int] (recommendation window)
 # - thrumming_stone_synergy: bool
 # - type_hint: 'creature' | 'noncreature'
-MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
+MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, Any]]] = {
    'cid_timeless_artificer': {
        'id': 'cid_timeless_artificer',
        'name': 'Cid, Timeless Artificer',
@ -610,7 +615,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['artificer kindred', 'hero kindred', 'artifacts matter'],
+            'tagsAny': ['artificer kindred', 'hero kindred', 'artifacts matter'],
            'tags_all': []
        },
        'default_count': 25,
@ -625,7 +630,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['burn','spellslinger','prowess','storm','copy','cascade','impulse draw','treasure','ramp','graveyard','mill','discard','recursion'],
+            'tagsAny': ['burn','spellslinger','prowess','storm','copy','cascade','impulse draw','treasure','ramp','graveyard','mill','discard','recursion'],
            'tags_all': []
        },
        'default_count': 25,
@ -640,7 +645,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['rabbit kindred','tokens matter','aggro'],
+            'tagsAny': ['rabbit kindred','tokens matter','aggro'],
            'tags_all': []
        },
        'default_count': 25,
@ -655,7 +660,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['tokens','tokens matter','go-wide','exile matters','ooze kindred','spells matter','spellslinger','graveyard','mill','discard','recursion','domain','self-mill','delirium','descend'],
+            'tagsAny': ['tokens','tokens matter','go-wide','exile matters','ooze kindred','spells matter','spellslinger','graveyard','mill','discard','recursion','domain','self-mill','delirium','descend'],
            'tags_all': []
        },
        'default_count': 25,
@ -670,7 +675,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': 'rats',
        'triggers': {
-            'tags_any': ['rats','swarm','aristocrats','sacrifice','devotion-b','lifedrain','graveyard','recursion'],
+            'tagsAny': ['rats','swarm','aristocrats','sacrifice','devotion-b','lifedrain','graveyard','recursion'],
            'tags_all': []
        },
        'default_count': 25,
@ -685,7 +690,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': 'rats',
        'triggers': {
-            'tags_any': ['rats','swarm','aristocrats','sacrifice','devotion-b','lifedrain','graveyard','recursion'],
+            'tagsAny': ['rats','swarm','aristocrats','sacrifice','devotion-b','lifedrain','graveyard','recursion'],
            'tags_all': []
        },
        'default_count': 25,
@ -700,7 +705,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': 7,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['dwarf kindred','treasure','equipment','tokens','go-wide','tribal'],
+            'tagsAny': ['dwarf kindred','treasure','equipment','tokens','go-wide','tribal'],
            'tags_all': []
        },
        'default_count': 7,
@ -715,7 +720,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['mill','advisor kindred','control','defenders','walls','draw-go'],
+            'tagsAny': ['mill','advisor kindred','control','defenders','walls','draw-go'],
            'tags_all': []
        },
        'default_count': 25,
@ -730,7 +735,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['demon kindred','aristocrats','sacrifice','recursion','lifedrain'],
+            'tagsAny': ['demon kindred','aristocrats','sacrifice','recursion','lifedrain'],
            'tags_all': []
        },
        'default_count': 25,
@ -745,7 +750,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': 9,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['wraith kindred','ring','amass','orc','menace','aristocrats','sacrifice','devotion-b'],
+            'tagsAny': ['wraith kindred','ring','amass','orc','menace','aristocrats','sacrifice','devotion-b'],
            'tags_all': []
        },
        'default_count': 9,
@ -760,7 +765,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['bird kindred','aggro'],
+            'tagsAny': ['bird kindred','aggro'],
            'tags_all': []
        },
        'default_count': 25,
@ -775,7 +780,7 @@ MULTI_COPY_ARCHETYPES: Final[dict[str, dict[str, _Any]]] = {
        'printed_cap': None,
        'exclusive_group': None,
        'triggers': {
-            'tags_any': ['aggro','human kindred','knight kindred','historic matters','artifacts matter'],
+            'tagsAny': ['aggro','human kindred','knight kindred','historic matters','artifacts matter'],
            'tags_all': []
        },
        'default_count': 25,
@ -918,3 +923,37 @@ ICONIC_CARDS: Final[set[str]] = {
    'Vampiric Tutor', 'Mystical Tutor', 'Enlightened Tutor', 'Worldly Tutor',
    'Eternal Witness', 'Solemn Simulacrum', 'Consecrated Sphinx', 'Avenger of Zendikar',
 }
+
+
+# M4: Parquet filtering helpers
+def get_commanders(df: pd.DataFrame) -> pd.DataFrame:
+    """Filter DataFrame to only commander-legal cards using isCommander flag.
+    
+    M4: Replaces CSV-based commander filtering with Parquet boolean flag.
+    
+    Args:
+        df: DataFrame with 'isCommander' column
+        
+    Returns:
+        Filtered DataFrame containing only commanders
+    """
+    if 'isCommander' not in df.columns:
+        return pd.DataFrame()
+    return df[df['isCommander'] == True].copy()  # noqa: E712
+
+
+def get_backgrounds(df: pd.DataFrame) -> pd.DataFrame:
+    """Filter DataFrame to only background cards using isBackground flag.
+    
+    M4: Replaces CSV-based background filtering with Parquet boolean flag.
+    
+    Args:
+        df: DataFrame with 'isBackground' column
+        
+    Returns:
+        Filtered DataFrame containing only backgrounds
+    """
+    if 'isBackground' not in df.columns:
+        return pd.DataFrame()
+    return df[df['isBackground'] == True].copy()  # noqa: E712
+
--- a/code/deck_builder/builder_utils.py
+++ b/code/deck_builder/builder_utils.py
@ -62,6 +62,32 @@ def _detect_produces_mana(text: str) -> bool:
 	return False


+def _extract_colors_from_land_type(type_line: str) -> List[str]:
+	"""Extract mana colors from basic land types in a type line.
+	
+	Args:
+		type_line: Card type line (e.g., "Land — Mountain", "Land — Forest Plains")
+		
+	Returns:
+		List of color letters (e.g., ['R'], ['G', 'W'])
+	"""
+	if not isinstance(type_line, str):
+		return []
+	type_lower = type_line.lower()
+	colors = []
+	basic_land_colors = {
+		'plains': 'W',
+		'island': 'U',
+		'swamp': 'B',
+		'mountain': 'R',
+		'forest': 'G',
+	}
+	for land_type, color in basic_land_colors.items():
+		if land_type in type_lower:
+			colors.append(color)
+	return colors
+
+
 def _resolved_csv_dir(base_dir: str | None = None) -> str:
 	try:
 		if base_dir:
@ -71,16 +97,86 @@ def _resolved_csv_dir(base_dir: str | None = None) -> str:
 		return base_dir or csv_dir()


+# M7: Cache for all cards Parquet DataFrame to avoid repeated loads
+_ALL_CARDS_CACHE: Dict[str, Any] = {"df": None, "mtime": None}
+
+
+def _load_all_cards_parquet() -> pd.DataFrame:
+	"""Load all cards from the unified Parquet file with caching.
+	
+	M4: Centralized Parquet loading for deck builder.
+	M7: Added module-level caching to avoid repeated file loads.
+	Returns empty DataFrame on error (defensive).
+	Converts numpy arrays to Python lists for compatibility with existing code.
+	"""
+	global _ALL_CARDS_CACHE
+	
+	try:
+		from code.path_util import get_processed_cards_path
+		from code.file_setup.data_loader import DataLoader
+		import numpy as np
+		import os
+		
+		parquet_path = get_processed_cards_path()
+		if not Path(parquet_path).exists():
+			return pd.DataFrame()
+		
+		# M7: Check cache and mtime
+		need_reload = _ALL_CARDS_CACHE["df"] is None
+		if not need_reload:
+			try:
+				current_mtime = os.path.getmtime(parquet_path)
+				cached_mtime = _ALL_CARDS_CACHE.get("mtime")
+				if cached_mtime is None or current_mtime > cached_mtime:
+					need_reload = True
+			except Exception:
+				# If mtime check fails, use cached version if available
+				pass
+		
+		if need_reload:
+			data_loader = DataLoader()
+			df = data_loader.read_cards(parquet_path, format="parquet")
+			
+			# M4: Convert numpy arrays to Python lists for compatibility
+			# Parquet stores lists as numpy arrays, but existing code expects Python lists
+			list_columns = ['themeTags', 'creatureTypes', 'metadataTags', 'keywords']
+			for col in list_columns:
+				if col in df.columns:
+					df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)
+			
+			# M7: Cache the result
+			_ALL_CARDS_CACHE["df"] = df
+			try:
+				_ALL_CARDS_CACHE["mtime"] = os.path.getmtime(parquet_path)
+			except Exception:
+				_ALL_CARDS_CACHE["mtime"] = None
+		
+		return _ALL_CARDS_CACHE["df"]
+	except Exception:
+		return pd.DataFrame()
+
+
@lru_cache(maxsize=None)
 def _load_multi_face_land_map(base_dir: str) -> Dict[str, Dict[str, Any]]:
-	"""Load mapping of multi-faced cards that have at least one land face."""
+	"""Load mapping of multi-faced cards that have at least one land face.
+	
+	M4: Migrated to use Parquet loading. base_dir parameter kept for
+	backward compatibility but now only used as cache key.
+	"""
 	try:
-		base_path = Path(base_dir)
-		csv_path = base_path / 'cards.csv'
-		if not csv_path.exists():
+		# M4: Load from Parquet instead of CSV
+		df = _load_all_cards_parquet()
+		if df.empty:
 			return {}
-		usecols = ['name', 'layout', 'side', 'type', 'text', 'manaCost', 'manaValue', 'faceName']
-		df = pd.read_csv(csv_path, usecols=usecols, low_memory=False)
+		
+		# Select only needed columns
+		# M9: Added backType to detect MDFC lands where land is on back face
+		# M9: Added colorIdentity to extract mana colors for MDFC lands
+		usecols = ['name', 'layout', 'side', 'type', 'text', 'manaCost', 'manaValue', 'faceName', 'backType', 'colorIdentity']
+		available_cols = [col for col in usecols if col in df.columns]
+		if not available_cols:
+			return {}
+		df = df[available_cols].copy()
 	except Exception:
 		return {}
 	if df.empty or 'layout' not in df.columns or 'type' not in df.columns:
@ -92,7 +188,16 @@ def _load_multi_face_land_map(base_dir: str) -> Dict[str, Dict[str, Any]]:
 	multi_df['type'] = multi_df['type'].fillna('').astype(str)
 	multi_df['side'] = multi_df['side'].fillna('').astype(str)
 	multi_df['text'] = multi_df['text'].fillna('').astype(str)
-	land_rows = multi_df[multi_df['type'].str.contains('land', case=False, na=False)]
+	# M9: Check both type and backType for land faces
+	if 'backType' in multi_df.columns:
+		multi_df['backType'] = multi_df['backType'].fillna('').astype(str)
+		land_mask = (
+			multi_df['type'].str.contains('land', case=False, na=False) |
+			multi_df['backType'].str.contains('land', case=False, na=False)
+		)
+		land_rows = multi_df[land_mask]
+	else:
+		land_rows = multi_df[multi_df['type'].str.contains('land', case=False, na=False)]
 	if land_rows.empty:
 		return {}
 	mapping: Dict[str, Dict[str, Any]] = {}
@ -101,6 +206,78 @@ def _load_multi_face_land_map(base_dir: str) -> Dict[str, Dict[str, Any]]:
 		seen: set[tuple[str, str, str]] = set()
 		front_is_land = False
 		layout_val = ''
+		
+		# M9: Handle merged rows with backType
+		if len(group) == 1 and 'backType' in group.columns:
+			row = group.iloc[0]
+			back_type_val = str(row.get('backType', '') or '')
+			if back_type_val and 'land' in back_type_val.lower():
+				# Construct synthetic faces from merged row
+				front_type = str(row.get('type', '') or '')
+				front_text = str(row.get('text', '') or '')
+				mana_cost_val = str(row.get('manaCost', '') or '')
+				mana_value_raw = row.get('manaValue', '')
+				mana_value_val = None
+				try:
+					if mana_value_raw not in (None, ''):
+						mana_value_val = float(mana_value_raw)
+						if math.isnan(mana_value_val):
+							mana_value_val = None
+				except Exception:
+					mana_value_val = None
+				
+				# Front face
+				faces.append({
+					'face': str(row.get('faceName', '') or name),
+					'side': 'a',
+					'type': front_type,
+					'text': front_text,
+					'mana_cost': mana_cost_val,
+					'mana_value': mana_value_val,
+					'produces_mana': _detect_produces_mana(front_text),
+					'is_land': 'land' in front_type.lower(),
+					'layout': str(row.get('layout', '') or ''),
+				})
+				
+				# Back face (synthesized)
+				# M9: Use colorIdentity column for MDFC land colors (more reliable than parsing type line)
+				color_identity_raw = row.get('colorIdentity', [])
+				if isinstance(color_identity_raw, str):
+					# Handle string format like "['G']" or "G"
+					try:
+						import ast
+						color_identity_raw = ast.literal_eval(color_identity_raw)
+					except Exception:
+						color_identity_raw = [c.strip() for c in color_identity_raw.split(',') if c.strip()]
+				back_face_colors = list(color_identity_raw) if color_identity_raw else []
+				# Fallback to parsing land type if colorIdentity not available
+				if not back_face_colors:
+					back_face_colors = _extract_colors_from_land_type(back_type_val)
+				
+				faces.append({
+					'face': name.split(' // ')[1] if ' // ' in name else 'Back',
+					'side': 'b',
+					'type': back_type_val,
+					'text': '',  # Not available in merged row
+					'mana_cost': '',
+					'mana_value': None,
+					'produces_mana': True,  # Assume land produces mana
+					'is_land': True,
+					'layout': str(row.get('layout', '') or ''),
+					'colors': back_face_colors,  # M9: Color information for mana sources
+				})
+				
+				front_is_land = 'land' in front_type.lower()
+				layout_val = str(row.get('layout', '') or '')
+				mapping[name] = {
+					'faces': faces,
+					'front_is_land': front_is_land,
+					'layout': layout_val,
+					'colors': back_face_colors,  # M9: Store colors at top level for easy access
+				}
+				continue
+		
+		# Original logic for multi-row format
 		for _, row in group.iterrows():
 			side_raw = str(row.get('side', '') or '').strip()
 			side_key = side_raw.lower()
@ -170,7 +347,13 @@ def parse_theme_tags(val) -> list[str]:
 	  ['Tag1', 'Tag2']
 	  "['Tag1', 'Tag2']"
 	  Tag1, Tag2
+	  numpy.ndarray (from Parquet)
 	Returns list of stripped string tags (may be empty)."""
+	# M4: Handle numpy arrays from Parquet
+	import numpy as np
+	if isinstance(val, np.ndarray):
+		return [str(x).strip() for x in val.tolist() if x and str(x).strip()]
+	
 	if isinstance(val, list):
 		flat: list[str] = []
 		for v in val:
@ -203,6 +386,18 @@ def parse_theme_tags(val) -> list[str]:
 	return []


+def ensure_theme_tags_list(val) -> list[str]:
+	"""Safely convert themeTags value to list, handling None, lists, and numpy arrays.
+	
+	This is a simpler wrapper around parse_theme_tags for the common case where
+	you just need to ensure you have a list to work with.
+	"""
+	if val is None:
+		return []
+	return parse_theme_tags(val)
+
+
+
 def normalize_theme_list(raw) -> list[str]:
 	"""Parse then lowercase + strip each tag."""
 	tags = parse_theme_tags(raw)
@ -230,7 +425,7 @@ def compute_color_source_matrix(card_library: Dict[str, dict], full_df) -> Dict[
 	matrix: Dict[str, Dict[str, int]] = {}
 	lookup = {}
 	if full_df is not None and not getattr(full_df, 'empty', True) and 'name' in full_df.columns:
-		for _, r in full_df.iterrows():  # type: ignore[attr-defined]
+		for _, r in full_df.iterrows():
 			nm = str(r.get('name', ''))
 			if nm and nm not in lookup:
 				lookup[nm] = r
@ -246,8 +441,13 @@ def compute_color_source_matrix(card_library: Dict[str, dict], full_df) -> Dict[
 		if hasattr(row, 'get'):
 			row_type_raw = row.get('type', row.get('type_line', '')) or ''
 		tline_full = str(row_type_raw).lower()
+		# M9: Check backType for MDFC land detection
+		back_type_raw = ''
+		if hasattr(row, 'get'):
+			back_type_raw = row.get('backType', '') or ''
+		back_type = str(back_type_raw).lower()
 		# Land or permanent that could produce mana via text
-		is_land = ('land' in entry_type) or ('land' in tline_full)
+		is_land = ('land' in entry_type) or ('land' in tline_full) or ('land' in back_type)
 		base_is_land = is_land
 		text_field_raw = ''
 		if hasattr(row, 'get'):
@ -277,7 +477,8 @@ def compute_color_source_matrix(card_library: Dict[str, dict], full_df) -> Dict[
 				if face_types or face_texts:
 					is_land = True
 		text_field = text_field_raw.lower().replace('\n', ' ')
-		# Skip obvious non-permanents (rituals etc.)
+		# Skip obvious non-permanents (rituals etc.) - but NOT if any face is a land
+		# M9: If is_land is True (from backType check), we keep it regardless of front face type
 		if (not is_land) and ('instant' in entry_type or 'sorcery' in entry_type or 'instant' in tline_full or 'sorcery' in tline_full):
 			continue
 		# Keep only candidates that are lands OR whose text indicates mana production
@ -351,6 +552,12 @@ def compute_color_source_matrix(card_library: Dict[str, dict], full_df) -> Dict[
 			colors['_dfc_land'] = True
 			if not (base_is_land or dfc_entry.get('front_is_land')):
 				colors['_dfc_counts_as_extra'] = True
+			# M9: Extract colors from DFC face metadata (back face land colors)
+			dfc_colors = dfc_entry.get('colors', [])
+			if dfc_colors:
+				for color in dfc_colors:
+					if color in colors:
+						colors[color] = 1
 		produces_any_color = any(colors[c] for c in ('W', 'U', 'B', 'R', 'G', 'C'))
 		if produces_any_color or colors.get('_dfc_land'):
 			matrix[name] = colors
@ -643,7 +850,7 @@ def select_top_land_candidates(df, already: set[str], basics: set[str], top_n: i
 	out: list[tuple[int,str,str,str]] = []
 	if df is None or getattr(df, 'empty', True):
 		return out
-	for _, row in df.iterrows():  # type: ignore[attr-defined]
+	for _, row in df.iterrows():
 		try:
 			name = str(row.get('name',''))
 			if not name or name in already or name in basics:
@ -907,7 +1114,7 @@ def prefer_owned_first(df, owned_names_lower: set[str], name_col: str = 'name'):
 # ---------------------------------------------------------------------------
 # Tag-driven land suggestion helpers
 # ---------------------------------------------------------------------------
-def build_tag_driven_suggestions(builder) -> list[dict]:  # type: ignore[override]
+def build_tag_driven_suggestions(builder) -> list[dict]:
 	"""Return a list of suggestion dicts based on selected commander tags.

 	Each dict fields:
@ -995,7 +1202,7 @@ def color_balance_addition_candidates(builder, target_color: str, combined_df) -
 		return []
 	existing = set(builder.card_library.keys())
 	out: list[tuple[str, int]] = []
-	for _, row in combined_df.iterrows():  # type: ignore[attr-defined]
+	for _, row in combined_df.iterrows():
 		name = str(row.get('name', ''))
 		if not name or name in existing or any(name == o[0] for o in out):
 			continue
--- a/code/deck_builder/combined_commander.py
+++ b/code/deck_builder/combined_commander.py
@ -7,8 +7,8 @@ from typing import Iterable, Sequence, Tuple

 from exceptions import CommanderPartnerError

-from code.deck_builder.partner_background_utils import analyze_partner_background
-from code.deck_builder.color_identity_utils import canon_color_code, color_label_from_code
+from .partner_background_utils import analyze_partner_background
+from .color_identity_utils import canon_color_code, color_label_from_code

 _WUBRG_ORDER: Tuple[str, ...] = ("W", "U", "B", "R", "G", "C")
 _COLOR_PRIORITY = {color: index for index, color in enumerate(_WUBRG_ORDER)}
--- a/code/deck_builder/enforcement.py
+++ b/code/deck_builder/enforcement.py
@ -88,12 +88,12 @@ def _candidate_pool_for_role(builder, role: str) -> List[Tuple[str, dict]]:
    # Sort by edhrecRank then manaValue
    try:
        from . import builder_utils as bu
-        sorted_df = bu.sort_by_priority(pool, ["edhrecRank", "manaValue"])  # type: ignore[attr-defined]
+        sorted_df = bu.sort_by_priority(pool, ["edhrecRank", "manaValue"])
        # Prefer-owned bias
        if getattr(builder, "prefer_owned", False):
            owned = getattr(builder, "owned_card_names", None)
            if owned:
-                sorted_df = bu.prefer_owned_first(sorted_df, {str(n).lower() for n in owned})  # type: ignore[attr-defined]
+                sorted_df = bu.prefer_owned_first(sorted_df, {str(n).lower() for n in owned})
    except Exception:
        sorted_df = pool

@ -363,7 +363,7 @@ def enforce_bracket_compliance(builder, mode: str = "prompt") -> Dict:
                    break
                # Rank candidates: break the most combos first; break ties by worst desirability
                cand_names = list(freq.keys())
-                cand_names.sort(key=lambda nm: (-int(freq.get(nm, 0)), _score(nm)), reverse=False)  # type: ignore[arg-type]
+                cand_names.sort(key=lambda nm: (-int(freq.get(nm, 0)), _score(nm)), reverse=False)
                removed_any = False
                for nm in cand_names:
                    if nm in blocked:
--- a/code/deck_builder/partner_selection.py
+++ b/code/deck_builder/partner_selection.py
@ -17,7 +17,7 @@ from logging_util import get_logger
 logger = get_logger(__name__)

 try:  # Optional pandas import for type checking without heavy dependency at runtime.
-    import pandas as _pd  # type: ignore
+    import pandas as _pd
 except Exception:  # pragma: no cover - tests provide DataFrame-like objects.
    _pd = None  # type: ignore

@ -267,7 +267,7 @@ def _find_commander_row(df: Any, name: str | None):
    if not target:
        return None

-    if _pd is not None and isinstance(df, _pd.DataFrame):  # type: ignore
+    if _pd is not None and isinstance(df, _pd.DataFrame):
        columns = [col for col in ("name", "faceName") if col in df.columns]
        for col in columns:
            series = df[col].astype(str).str.casefold()
@ -363,7 +363,14 @@ def _normalize_color_identity(value: Any) -> tuple[str, ...]:
 def _normalize_string_sequence(value: Any) -> tuple[str, ...]:
    if value is None:
        return tuple()
-    if isinstance(value, (list, tuple, set)):
+    # Handle numpy arrays, lists, tuples, sets, and other sequences
+    try:
+        import numpy as np
+        is_numpy = isinstance(value, np.ndarray)
+    except ImportError:
+        is_numpy = False
+    
+    if isinstance(value, (list, tuple, set)) or is_numpy:
        items = list(value)
    else:
        text = _safe_str(value)
--- a/code/deck_builder/phases/phase0_core.py
+++ b/code/deck_builder/phases/phase0_core.py
@ -25,11 +25,11 @@ No behavior change intended.

 # Attempt to use a fast fuzzy library; fall back gracefully
 try:
-    from rapidfuzz import process as rf_process, fuzz as rf_fuzz  # type: ignore
+    from rapidfuzz import process as rf_process, fuzz as rf_fuzz
    _FUZZ_BACKEND = "rapidfuzz"
 except ImportError:  # pragma: no cover - environment dependent
    try:
-        from fuzzywuzzy import process as fw_process, fuzz as fw_fuzz  # type: ignore
+        from fuzzywuzzy import process as fw_process, fuzz as fw_fuzz
        _FUZZ_BACKEND = "fuzzywuzzy"
    except ImportError:  # pragma: no cover
        _FUZZ_BACKEND = "difflib"
--- a/code/deck_builder/phases/phase1_commander.py
+++ b/code/deck_builder/phases/phase1_commander.py
@ -68,7 +68,7 @@ class CommanderSelectionMixin:
            out_words[0] = out_words[0][:1].upper() + out_words[0][1:]
        return ' '.join(out_words)

-    def choose_commander(self) -> str:  # type: ignore[override]
+    def choose_commander(self) -> str:
        df = self.load_commander_data()
        names = df["name"].tolist()
        while True:
@ -113,7 +113,7 @@ class CommanderSelectionMixin:
                    continue
            query = self._normalize_commander_query(choice)  # treat as new (normalized) query

-    def _present_commander_and_confirm(self, df: pd.DataFrame, name: str) -> bool:  # type: ignore[override]
+    def _present_commander_and_confirm(self, df: pd.DataFrame, name: str) -> bool:
        row = df[df["name"] == name].iloc[0]
        pretty = self._format_commander_pretty(row)
        self.output_func("\n" + pretty)
@ -126,16 +126,17 @@ class CommanderSelectionMixin:
                return False
            self.output_func("Please enter y or n.")

-    def _apply_commander_selection(self, row: pd.Series):  # type: ignore[override]
+    def _apply_commander_selection(self, row: pd.Series):
        self.commander_name = row["name"]
        self.commander_row = row
-        self.commander_tags = list(row.get("themeTags", []) or [])
+        tags_value = row.get("themeTags", [])
+        self.commander_tags = list(tags_value) if tags_value is not None else []
        self._initialize_commander_dict(row)

    # ---------------------------
    # Tag Prioritization
    # ---------------------------
-    def select_commander_tags(self) -> List[str]:  # type: ignore[override]
+    def select_commander_tags(self) -> List[str]:
        if not self.commander_name:
            self.output_func("No commander chosen yet. Selecting commander first...")
            self.choose_commander()
@ -172,7 +173,7 @@ class CommanderSelectionMixin:
        self._update_commander_dict_with_selected_tags()
        return self.selected_tags

-    def _prompt_tag_choice(self, available: List[str], prompt_text: str, allow_stop: bool) -> Optional[str]:  # type: ignore[override]
+    def _prompt_tag_choice(self, available: List[str], prompt_text: str, allow_stop: bool) -> Optional[str]:
        while True:
            self.output_func("\nCurrent options:")
            for i, t in enumerate(available, 1):
@ -191,7 +192,7 @@ class CommanderSelectionMixin:
                return matches[0]
            self.output_func("Invalid selection. Try again.")

-    def _update_commander_dict_with_selected_tags(self):  # type: ignore[override]
+    def _update_commander_dict_with_selected_tags(self):
        if not self.commander_dict and self.commander_row is not None:
            self._initialize_commander_dict(self.commander_row)
        if not self.commander_dict:
@ -204,7 +205,7 @@ class CommanderSelectionMixin:
    # ---------------------------
    # Power Bracket Selection
    # ---------------------------
-    def select_power_bracket(self) -> BracketDefinition:  # type: ignore[override]
+    def select_power_bracket(self) -> BracketDefinition:
        if self.bracket_definition:
            return self.bracket_definition
        self.output_func("\nChoose Deck Power Bracket:")
@ -228,14 +229,14 @@ class CommanderSelectionMixin:
                    return match
            self.output_func("Invalid input. Type 1-5 or 'info'.")

-    def _print_bracket_details(self):  # type: ignore[override]
+    def _print_bracket_details(self):
        self.output_func("\nBracket Details:")
        for bd in BRACKET_DEFINITIONS:
            self.output_func(f"\n[{bd.level}] {bd.name}")
            self.output_func(bd.long_desc)
            self.output_func(self._format_limits(bd.limits))

-    def _print_selected_bracket_summary(self):  # type: ignore[override]
+    def _print_selected_bracket_summary(self):
        self.output_func("\nBracket Constraints:")
        if self.bracket_limits:
            self.output_func(self._format_limits(self.bracket_limits))
--- a/code/deck_builder/phases/phase2_lands_basics.py
+++ b/code/deck_builder/phases/phase2_lands_basics.py
@ -22,7 +22,7 @@ Expected attributes / methods on the host DeckBuilder:


 class LandBasicsMixin:
-    def add_basic_lands(self):  # type: ignore[override]
+    def add_basic_lands(self):
        """Add basic (or snow basic) lands based on color identity.

        Logic:
@ -71,8 +71,8 @@ class LandBasicsMixin:
        basic_min: Optional[int] = None
        land_total: Optional[int] = None
        if hasattr(self, 'ideal_counts') and getattr(self, 'ideal_counts'):
-            basic_min = self.ideal_counts.get('basic_lands')  # type: ignore[attr-defined]
-            land_total = self.ideal_counts.get('lands')  # type: ignore[attr-defined]
+            basic_min = self.ideal_counts.get('basic_lands')
+            land_total = self.ideal_counts.get('lands')
        if basic_min is None:
            basic_min = getattr(bc, 'DEFAULT_BASIC_LAND_COUNT', 20)
        if land_total is None:
@ -136,7 +136,7 @@ class LandBasicsMixin:
            self.output_func(f"  {name.ljust(width)} : {cnt}")
        self.output_func(f"  Total Basics : {sum(allocation.values())} (Target {target_basics}, Min {basic_min})")

-    def run_land_step1(self):  # type: ignore[override]
+    def run_land_step1(self):
        """Public wrapper to execute land building step 1 (basics)."""
        self.add_basic_lands()
        try:
--- a/code/deck_builder/phases/phase2_lands_duals.py
+++ b/code/deck_builder/phases/phase2_lands_duals.py
@ -21,7 +21,7 @@ Host DeckBuilder must provide:
 """

 class LandDualsMixin:
-    def add_dual_lands(self, requested_count: int | None = None):  # type: ignore[override]
+    def add_dual_lands(self, requested_count: int | None = None):
        """Add two-color 'typed' dual lands based on color identity."""
        if not getattr(self, 'files_to_load', []):
            try:
@ -117,10 +117,10 @@ class LandDualsMixin:
                pair_buckets[key] = names
        min_basic_cfg = getattr(bc, 'DEFAULT_BASIC_LAND_COUNT', 20)
        if getattr(self, 'ideal_counts', None):
-            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)  # type: ignore[attr-defined]
-        basic_floor = self._basic_floor(min_basic_cfg)  # type: ignore[attr-defined]
+            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)
+        basic_floor = self._basic_floor(min_basic_cfg)
        default_dual_target = getattr(bc, 'DUAL_LAND_DEFAULT_COUNT', 6)
-        remaining_capacity = max(0, land_target - self._current_land_count())  # type: ignore[attr-defined]
+        remaining_capacity = max(0, land_target - self._current_land_count())
        effective_default = min(default_dual_target, remaining_capacity if remaining_capacity>0 else len(pool), len(pool))
        desired = effective_default if requested_count is None else max(0, int(requested_count))
        if desired == 0:
@ -129,14 +129,14 @@ class LandDualsMixin:
        if remaining_capacity == 0 and desired > 0:
            slots_needed = desired
            freed_slots = 0
-            while freed_slots < slots_needed and self._count_basic_lands() > basic_floor:  # type: ignore[attr-defined]
-                target_basic = self._choose_basic_to_trim()  # type: ignore[attr-defined]
-                if not target_basic or not self._decrement_card(target_basic):  # type: ignore[attr-defined]
+            while freed_slots < slots_needed and self._count_basic_lands() > basic_floor:
+                target_basic = self._choose_basic_to_trim()
+                if not target_basic or not self._decrement_card(target_basic):
                    break
                freed_slots += 1
            if freed_slots == 0:
                desired = 0
-        remaining_capacity = max(0, land_target - self._current_land_count())  # type: ignore[attr-defined]
+        remaining_capacity = max(0, land_target - self._current_land_count())
        desired = min(desired, remaining_capacity, len(pool))
        if desired <= 0:
            self.output_func("Dual Lands: No capacity after trimming; skipping.")
@ -146,7 +146,7 @@ class LandDualsMixin:
        rng = getattr(self, 'rng', None)
        try:
            if rng:
-                rng.shuffle(bucket_keys)  # type: ignore
+                rng.shuffle(bucket_keys)
            else:
                random.shuffle(bucket_keys)
        except Exception:
@ -171,7 +171,7 @@ class LandDualsMixin:
                break
        added: List[str] = []
        for name in chosen:
-            if self._current_land_count() >= land_target:  # type: ignore[attr-defined]
+            if self._current_land_count() >= land_target:
                break
            # Determine sub_role as concatenated color pair for traceability
            try:
@ -198,7 +198,7 @@ class LandDualsMixin:
                role='dual',
                sub_role=sub_role,
                added_by='lands_step5'
-            )  # type: ignore[attr-defined]
+            )
            added.append(name)
        self.output_func("\nDual Lands Added (Step 5):")
        if not added:
@ -207,11 +207,11 @@ class LandDualsMixin:
            width = max(len(n) for n in added)
            for n in added:
                self.output_func(f"  {n.ljust(width)} : 1")
-        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")  # type: ignore[attr-defined]
+        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")

-    def run_land_step5(self, requested_count: int | None = None):  # type: ignore[override]
+    def run_land_step5(self, requested_count: int | None = None):
        self.add_dual_lands(requested_count=requested_count)
-        self._enforce_land_cap(step_label="Duals (Step 5)")  # type: ignore[attr-defined]
+        self._enforce_land_cap(step_label="Duals (Step 5)")
        try:
            from .. import builder_utils as _bu
            _bu.export_current_land_pool(self, '5')
--- a/code/deck_builder/phases/phase2_lands_fetch.py
+++ b/code/deck_builder/phases/phase2_lands_fetch.py
@ -19,7 +19,7 @@ Host DeckBuilder must supply:
 """

 class LandFetchMixin:
-    def add_fetch_lands(self, requested_count: int | None = None):  # type: ignore[override]
+    def add_fetch_lands(self, requested_count: int | None = None):
        """Add fetch lands (color-specific + generic) respecting land target."""
        if not getattr(self, 'files_to_load', []):
            try:
@ -28,8 +28,8 @@ class LandFetchMixin:
            except Exception as e:  # pragma: no cover - defensive
                self.output_func(f"Cannot add fetch lands until color identity resolved: {e}")
                return
-        land_target = (getattr(self, 'ideal_counts', {}).get('lands') if getattr(self, 'ideal_counts', None) else None) or getattr(bc, 'DEFAULT_LAND_COUNT', 35)  # type: ignore[attr-defined]
-        current = self._current_land_count()  # type: ignore[attr-defined]
+        land_target = (getattr(self, 'ideal_counts', {}).get('lands') if getattr(self, 'ideal_counts', None) else None) or getattr(bc, 'DEFAULT_LAND_COUNT', 35)
+        current = self._current_land_count()
        color_order = [c for c in getattr(self, 'color_identity', []) if c in ['W','U','B','R','G']]
        color_map = getattr(bc, 'COLOR_TO_FETCH_LANDS', {})
        candidates: List[str] = []
@ -56,7 +56,7 @@ class LandFetchMixin:
            self.output_func("\nAdd Fetch Lands (Step 4):")
            self.output_func("Fetch lands help fix colors & enable landfall / graveyard synergies.")
            prompt = f"Enter desired number of fetch lands (default: {effective_default}):"
-            desired = self._prompt_int_with_default(prompt + ' ', effective_default, minimum=0, maximum=20)  # type: ignore[attr-defined]
+            desired = self._prompt_int_with_default(prompt + ' ', effective_default, minimum=0, maximum=20)
        else:
            desired = max(0, int(requested_count))
        if desired > remaining_fetch_slots:
@ -70,20 +70,20 @@ class LandFetchMixin:
        if remaining_capacity == 0 and desired > 0:
            min_basic_cfg = getattr(bc, 'DEFAULT_BASIC_LAND_COUNT', 20)
            if getattr(self, 'ideal_counts', None):
-                min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)  # type: ignore[attr-defined]
-            floor_basics = self._basic_floor(min_basic_cfg)  # type: ignore[attr-defined]
+                min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)
+            floor_basics = self._basic_floor(min_basic_cfg)
            slots_needed = desired
-            while slots_needed > 0 and self._count_basic_lands() > floor_basics:  # type: ignore[attr-defined]
-                target_basic = self._choose_basic_to_trim()  # type: ignore[attr-defined]
-                if not target_basic or not self._decrement_card(target_basic):  # type: ignore[attr-defined]
+            while slots_needed > 0 and self._count_basic_lands() > floor_basics:
+                target_basic = self._choose_basic_to_trim()
+                if not target_basic or not self._decrement_card(target_basic):
                    break
                slots_needed -= 1
-                remaining_capacity = max(0, land_target - self._current_land_count())  # type: ignore[attr-defined]
+                remaining_capacity = max(0, land_target - self._current_land_count())
                if remaining_capacity > 0 and slots_needed == 0:
                    break
            if slots_needed > 0 and remaining_capacity == 0:
                desired -= slots_needed
-        remaining_capacity = max(0, land_target - self._current_land_count())  # type: ignore[attr-defined]
+        remaining_capacity = max(0, land_target - self._current_land_count())
        desired = min(desired, remaining_capacity, len(candidates), remaining_fetch_slots)
        if desired <= 0:
            self.output_func("Fetch Lands: No capacity (after trimming) or desired reduced to 0; skipping.")
@ -101,7 +101,7 @@ class LandFetchMixin:
            if k >= len(pool):
                return pool.copy()
            try:
-                return (rng.sample if rng else random.sample)(pool, k)  # type: ignore
+                return (rng.sample if rng else random.sample)(pool, k)
            except Exception:
                return pool[:k]
        need = desired
@ -117,7 +117,7 @@ class LandFetchMixin:

        added: List[str] = []
        for nm in chosen:
-            if self._current_land_count() >= land_target:  # type: ignore[attr-defined]
+            if self._current_land_count() >= land_target:
                break
            note = 'generic' if nm in generic_list else 'color-specific'
            self.add_card(
@ -126,11 +126,11 @@ class LandFetchMixin:
                role='fetch',
                sub_role=note,
                added_by='lands_step4'
-            )  # type: ignore[attr-defined]
+            )
            added.append(nm)
        # Record actual number of fetch lands added for export/replay context
        try:
-            setattr(self, 'fetch_count', len(added))  # type: ignore[attr-defined]
+            setattr(self, 'fetch_count', len(added))
        except Exception:
            pass
        self.output_func("\nFetch Lands Added (Step 4):")
@ -141,9 +141,9 @@ class LandFetchMixin:
            for n in added:
                note = 'generic' if n in generic_list else 'color-specific'
                self.output_func(f"  {n.ljust(width)} : 1  ({note})")
-        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")  # type: ignore[attr-defined]
+        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")

-    def run_land_step4(self, requested_count: int | None = None):  # type: ignore[override]
+    def run_land_step4(self, requested_count: int | None = None):
        """Public wrapper to add fetch lands.

        If ideal_counts['fetch_lands'] is set, it will be used to bypass the prompt in both CLI and web builds.
@ -155,7 +155,7 @@ class LandFetchMixin:
        except Exception:
            desired = requested_count
        self.add_fetch_lands(requested_count=desired)
-        self._enforce_land_cap(step_label="Fetch (Step 4)")  # type: ignore[attr-defined]
+        self._enforce_land_cap(step_label="Fetch (Step 4)")
        try:
            from .. import builder_utils as _bu
            _bu.export_current_land_pool(self, '4')
--- a/code/deck_builder/phases/phase2_lands_kindred.py
+++ b/code/deck_builder/phases/phase2_lands_kindred.py
@ -20,7 +20,7 @@ Host DeckBuilder must provide:
 """

 class LandKindredMixin:
-    def add_kindred_lands(self):  # type: ignore[override]
+    def add_kindred_lands(self):
        """Add kindred-oriented lands ONLY if a selected tag includes 'Kindred' or 'Tribal'.

        Baseline inclusions on kindred focus:
@ -41,32 +41,32 @@ class LandKindredMixin:
            self.output_func("Kindred Lands: No selected kindred/tribal tag; skipping.")
            return
        if hasattr(self, 'ideal_counts') and getattr(self, 'ideal_counts'):
-            land_target = self.ideal_counts.get('lands', getattr(bc, 'DEFAULT_LAND_COUNT', 35))  # type: ignore[attr-defined]
+            land_target = self.ideal_counts.get('lands', getattr(bc, 'DEFAULT_LAND_COUNT', 35))
        else:
            land_target = getattr(bc, 'DEFAULT_LAND_COUNT', 35)
        min_basic_cfg = getattr(bc, 'DEFAULT_BASIC_LAND_COUNT', 20)
        if hasattr(self, 'ideal_counts') and getattr(self, 'ideal_counts'):
-            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)  # type: ignore[attr-defined]
-        basic_floor = self._basic_floor(min_basic_cfg)  # type: ignore[attr-defined]
+            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)
+        basic_floor = self._basic_floor(min_basic_cfg)

        def ensure_capacity() -> bool:
-            if self._current_land_count() < land_target:  # type: ignore[attr-defined]
+            if self._current_land_count() < land_target:
                return True
-            if self._count_basic_lands() <= basic_floor:  # type: ignore[attr-defined]
+            if self._count_basic_lands() <= basic_floor:
                return False
-            target_basic = self._choose_basic_to_trim()  # type: ignore[attr-defined]
+            target_basic = self._choose_basic_to_trim()
            if not target_basic:
                return False
-            if not self._decrement_card(target_basic):  # type: ignore[attr-defined]
+            if not self._decrement_card(target_basic):
                return False
-            return self._current_land_count() < land_target  # type: ignore[attr-defined]
+            return self._current_land_count() < land_target

        colors = getattr(self, 'color_identity', []) or []
        added: List[str] = []
        reasons: Dict[str, str] = {}

        def try_add(name: str, reason: str):
-            if name in self.card_library:  # type: ignore[attr-defined]
+            if name in self.card_library:
                return
            if not ensure_capacity():
                return
@ -77,7 +77,7 @@ class LandKindredMixin:
                sub_role='baseline' if reason.startswith('kindred focus') else 'tribe-specific',
                added_by='lands_step3',
                trigger_tag='Kindred/Tribal'
-            )  # type: ignore[attr-defined]
+            )
            added.append(name)
            reasons[name] = reason

@ -105,14 +105,14 @@ class LandKindredMixin:
        if snapshot is not None and not snapshot.empty and tribe_terms:
            dynamic_limit = 5
            for tribe in sorted(tribe_terms):
-                if self._current_land_count() >= land_target or dynamic_limit <= 0:  # type: ignore[attr-defined]
+                if self._current_land_count() >= land_target or dynamic_limit <= 0:
                    break
                tribe_lower = tribe.lower()
                matches: List[str] = []
                for _, row in snapshot.iterrows():
                    try:
                        nm = str(row.get('name', ''))
-                        if not nm or nm in self.card_library:  # type: ignore[attr-defined]
+                        if not nm or nm in self.card_library:
                            continue
                        tline = str(row.get('type', row.get('type_line', ''))).lower()
                        if 'land' not in tline:
@ -125,7 +125,7 @@ class LandKindredMixin:
                    except Exception:
                        continue
                for nm in matches[:2]:
-                    if self._current_land_count() >= land_target or dynamic_limit <= 0:  # type: ignore[attr-defined]
+                    if self._current_land_count() >= land_target or dynamic_limit <= 0:
                        break
                    if nm in added or nm in getattr(bc, 'BASIC_LANDS', []):
                        continue
@ -139,12 +139,12 @@ class LandKindredMixin:
            width = max(len(n) for n in added)
            for n in added:
                self.output_func(f"  {n.ljust(width)} : 1  ({reasons.get(n,'')})")
-        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")  # type: ignore[attr-defined]
+        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")

-    def run_land_step3(self):  # type: ignore[override]
+    def run_land_step3(self):
        """Public wrapper to add kindred-focused lands."""
        self.add_kindred_lands()
-        self._enforce_land_cap(step_label="Kindred (Step 3)")  # type: ignore[attr-defined]
+        self._enforce_land_cap(step_label="Kindred (Step 3)")
        try:
            from .. import builder_utils as _bu
            _bu.export_current_land_pool(self, '3')
--- a/code/deck_builder/phases/phase2_lands_misc.py
+++ b/code/deck_builder/phases/phase2_lands_misc.py
@ -19,7 +19,7 @@ class LandMiscUtilityMixin:
      - Diagnostics & CSV exports
    """

-    def add_misc_utility_lands(self, requested_count: Optional[int] = None):  # type: ignore[override]
+    def add_misc_utility_lands(self, requested_count: Optional[int] = None):
        # --- Initialization & candidate collection ---
        if not getattr(self, 'files_to_load', None):
            try:
@ -293,7 +293,7 @@ class LandMiscUtilityMixin:
        if getattr(self, 'show_diagnostics', False) and filtered_out:
            self.output_func(f"  (Mono-color excluded candidates: {', '.join(filtered_out)})")

-    def run_land_step7(self, requested_count: Optional[int] = None):  # type: ignore[override]
+    def run_land_step7(self, requested_count: Optional[int] = None):
        self.add_misc_utility_lands(requested_count=requested_count)
        self._enforce_land_cap(step_label="Utility (Step 7)")
        self._build_tag_driven_land_suggestions()
@ -305,12 +305,12 @@ class LandMiscUtilityMixin:
            pass

    # ---- Tag-driven suggestion helpers (used after Step 7) ----
-    def _build_tag_driven_land_suggestions(self):  # type: ignore[override]
+    def _build_tag_driven_land_suggestions(self):
        suggestions = bu.build_tag_driven_suggestions(self)
        if suggestions:
            self.suggested_lands_queue.extend(suggestions)

-    def _apply_land_suggestions_if_room(self):  # type: ignore[override]
+    def _apply_land_suggestions_if_room(self):
        if not self.suggested_lands_queue:
            return
        land_target = getattr(self, 'ideal_counts', {}).get('lands', getattr(bc, 'DEFAULT_LAND_COUNT', 35)) if getattr(self, 'ideal_counts', None) else getattr(bc, 'DEFAULT_LAND_COUNT', 35)
--- a/code/deck_builder/phases/phase2_lands_optimize.py
+++ b/code/deck_builder/phases/phase2_lands_optimize.py
@ -12,7 +12,7 @@ class LandOptimizationMixin:
    Provides optimize_tapped_lands and run_land_step8 (moved from monolithic builder).
    """

-    def optimize_tapped_lands(self):  # type: ignore[override]
+    def optimize_tapped_lands(self):
        df = getattr(self, '_combined_cards_df', None)
        if df is None or df.empty:
            return
@ -146,7 +146,7 @@ class LandOptimizationMixin:
                new_tapped += 1
        self.output_func(f"  Tapped Lands After : {new_tapped} (threshold {threshold})")

-    def run_land_step8(self):  # type: ignore[override]
+    def run_land_step8(self):
        self.optimize_tapped_lands()
        self._enforce_land_cap(step_label="Tapped Opt (Step 8)")
        if self.color_source_matrix_baseline is None:
--- a/code/deck_builder/phases/phase2_lands_staples.py
+++ b/code/deck_builder/phases/phase2_lands_staples.py
@ -27,10 +27,10 @@ class LandStaplesMixin:
    # ---------------------------
    # Land Building Step 2: Staple Nonbasic Lands (NO Kindred yet)
    # ---------------------------
-    def _current_land_count(self) -> int:  # type: ignore[override]
+    def _current_land_count(self) -> int:
        """Return total number of land cards currently in the library (counts duplicates)."""
        total = 0
-        for name, entry in self.card_library.items():  # type: ignore[attr-defined]
+        for name, entry in self.card_library.items():
            ctype = entry.get('Card Type', '')
            if ctype and 'land' in ctype.lower():
                total += entry.get('Count', 1)
@ -47,7 +47,7 @@ class LandStaplesMixin:
                    continue
        return total

-    def add_staple_lands(self):  # type: ignore[override]
+    def add_staple_lands(self):
        """Add generic staple lands defined in STAPLE_LAND_CONDITIONS (excluding kindred lands).

        Respects total land target (ideal_counts['lands']). Skips additions once target reached.
@ -62,25 +62,25 @@ class LandStaplesMixin:
                return
        land_target = None
        if hasattr(self, 'ideal_counts') and getattr(self, 'ideal_counts'):
-            land_target = self.ideal_counts.get('lands')  # type: ignore[attr-defined]
+            land_target = self.ideal_counts.get('lands')
        if land_target is None:
            land_target = getattr(bc, 'DEFAULT_LAND_COUNT', 35)
        min_basic_cfg = getattr(bc, 'DEFAULT_BASIC_LAND_COUNT', 20)
        if hasattr(self, 'ideal_counts') and getattr(self, 'ideal_counts'):
-            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)  # type: ignore[attr-defined]
-        basic_floor = self._basic_floor(min_basic_cfg)  # type: ignore[attr-defined]
+            min_basic_cfg = self.ideal_counts.get('basic_lands', min_basic_cfg)
+        basic_floor = self._basic_floor(min_basic_cfg)

        def ensure_capacity() -> bool:
-            if self._current_land_count() < land_target:  # type: ignore[attr-defined]
+            if self._current_land_count() < land_target:
                return True
-            if self._count_basic_lands() <= basic_floor:  # type: ignore[attr-defined]
+            if self._count_basic_lands() <= basic_floor:
                return False
-            target_basic = self._choose_basic_to_trim()  # type: ignore[attr-defined]
+            target_basic = self._choose_basic_to_trim()
            if not target_basic:
                return False
-            if not self._decrement_card(target_basic):  # type: ignore[attr-defined]
+            if not self._decrement_card(target_basic):
                return False
-            return self._current_land_count() < land_target  # type: ignore[attr-defined]
+            return self._current_land_count() < land_target

        commander_tags_all = set(getattr(self, 'commander_tags', []) or []) | set(getattr(self, 'selected_tags', []) or [])
        colors = getattr(self, 'color_identity', []) or []
@ -102,7 +102,7 @@ class LandStaplesMixin:
            if not ensure_capacity():
                self.output_func("Staple Lands: Cannot free capacity without violating basic floor; stopping additions.")
                break
-            if land_name in self.card_library:  # type: ignore[attr-defined]
+            if land_name in self.card_library:
                continue
            try:
                include = cond(list(commander_tags_all), colors, commander_power)
@ -115,7 +115,7 @@ class LandStaplesMixin:
                    role='staple',
                    sub_role='generic-staple',
                    added_by='lands_step2'
-                )  # type: ignore[attr-defined]
+                )
                added.append(land_name)
                if land_name == 'Command Tower':
                    reasons[land_name] = f"multi-color ({len(colors)} colors)"
@ -137,12 +137,12 @@ class LandStaplesMixin:
            for n in added:
                reason = reasons.get(n, '')
                self.output_func(f"  {n.ljust(width)} : 1  {('(' + reason + ')') if reason else ''}")
-        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")  # type: ignore[attr-defined]
+        self.output_func(f"  Land Count Now : {self._current_land_count()} / {land_target}")

-    def run_land_step2(self):  # type: ignore[override]
+    def run_land_step2(self):
        """Public wrapper for adding generic staple nonbasic lands (excluding kindred)."""
        self.add_staple_lands()
-        self._enforce_land_cap(step_label="Staples (Step 2)")  # type: ignore[attr-defined]
+        self._enforce_land_cap(step_label="Staples (Step 2)")
        try:
            from .. import builder_utils as _bu
            _bu.export_current_land_pool(self, '2')
--- a/code/deck_builder/phases/phase2_lands_triples.py
+++ b/code/deck_builder/phases/phase2_lands_triples.py
@ -59,7 +59,7 @@ class LandTripleMixin:
            'forest': 'G',
        }

-        for _, row in df.iterrows():  # type: ignore
+        for _, row in df.iterrows():
            try:
                name = str(row.get('name',''))
                if not name or name in self.card_library:
--- a/code/deck_builder/phases/phase3_creatures.py
+++ b/code/deck_builder/phases/phase3_creatures.py
@ -33,7 +33,7 @@ class CreatureAdditionMixin:
            self.output_func("Card pool missing 'type' column; cannot add creatures.")
            return
        try:
-            context = self.get_theme_context()  # type: ignore[attr-defined]
+            context = self.get_theme_context()
        except Exception:
            context = None
        if context is None or not getattr(context, 'ordered_targets', []):
@ -120,7 +120,7 @@ class CreatureAdditionMixin:
                            mana_cost=row.get('manaCost',''),
                            mana_value=row.get('manaValue', row.get('cmc','')),
                            creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                            tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                            tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                            role='creature',
                            sub_role='all_theme',
                            added_by='creature_all_theme',
@ -231,7 +231,7 @@ class CreatureAdditionMixin:
                    mana_cost=row.get('manaCost',''),
                    mana_value=row.get('manaValue', row.get('cmc','')),
                    creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                    tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                    tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                    role='creature',
                    sub_role=role,
                    added_by='creature_add',
@ -288,7 +288,7 @@ class CreatureAdditionMixin:
                        mana_cost=row.get('manaCost',''),
                        mana_value=row.get('manaValue', row.get('cmc','')),
                        creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                        tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                        tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                        role='creature',
                        sub_role='fill',
                        added_by='creature_fill',
@ -480,7 +480,7 @@ class CreatureAdditionMixin:
                drop_idx = tags_series.apply(lambda lst, nd=needles: any(any(n in t for n in nd) for t in lst))
                mask_keep = [mk and (not di) for mk, di in zip(mask_keep, drop_idx.tolist())]
            try:
-                import pandas as _pd  # type: ignore
+                import pandas as _pd
                mask_keep = _pd.Series(mask_keep, index=df.index)
            except Exception:
                pass
@ -551,7 +551,7 @@ class CreatureAdditionMixin:
                mana_cost=row.get('manaCost',''),
                mana_value=row.get('manaValue', row.get('cmc','')),
                creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                role='creature',
                sub_role=role,
                added_by='creature_add',
@ -590,7 +590,7 @@ class CreatureAdditionMixin:
                mana_cost=row.get('manaCost',''),
                mana_value=row.get('manaValue', row.get('cmc','')),
                creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                role='creature',
                sub_role='fill',
                added_by='creature_fill',
@ -672,7 +672,7 @@ class CreatureAdditionMixin:
                mana_cost=row.get('manaCost',''),
                mana_value=row.get('manaValue', row.get('cmc','')),
                creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [],
-                tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                role='creature',
                sub_role='all_theme',
                added_by='creature_all_theme',
--- a/code/deck_builder/phases/phase4_spells.py
+++ b/code/deck_builder/phases/phase4_spells.py
@ -78,7 +78,7 @@ class SpellAdditionMixin:
                # Combine into keep mask
                mask_keep = [mk and (not di) for mk, di in zip(mask_keep, drop_idx.tolist())]
            try:
-                import pandas as _pd  # type: ignore
+                import pandas as _pd
                mask_keep = _pd.Series(mask_keep, index=df.index)
            except Exception:
                pass
@ -193,7 +193,7 @@ class SpellAdditionMixin:
                    card_type=r.get('type',''),
                    mana_cost=r.get('manaCost',''),
                    mana_value=r.get('manaValue', r.get('cmc','')),
-                    tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                    tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                    role='ramp',
                    sub_role=phase_name.lower(),
                    added_by='spell_ramp'
@ -322,7 +322,7 @@ class SpellAdditionMixin:
                card_type=r.get('type',''),
                mana_cost=r.get('manaCost',''),
                mana_value=r.get('manaValue', r.get('cmc','')),
-                tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                role='removal',
                sub_role='spot',
                added_by='spell_removal'
@ -399,7 +399,7 @@ class SpellAdditionMixin:
                card_type=r.get('type',''),
                mana_cost=r.get('manaCost',''),
                mana_value=r.get('manaValue', r.get('cmc','')),
-                tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                role='wipe',
                sub_role='board',
                added_by='spell_wipe'
@ -493,7 +493,7 @@ class SpellAdditionMixin:
                card_type=r.get('type',''),
                mana_cost=r.get('manaCost',''),
                mana_value=r.get('manaValue', r.get('cmc','')),
-                tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                role='card_advantage',
                sub_role='conditional',
                added_by='spell_draw'
@ -516,7 +516,7 @@ class SpellAdditionMixin:
                    card_type=r.get('type',''),
                    mana_cost=r.get('manaCost',''),
                    mana_value=r.get('manaValue', r.get('cmc','')),
-                    tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                    tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                    role='card_advantage',
                    sub_role='unconditional',
                    added_by='spell_draw'
@ -713,7 +713,7 @@ class SpellAdditionMixin:
                card_type=r.get('type',''),
                mana_cost=r.get('manaCost',''),
                mana_value=r.get('manaValue', r.get('cmc','')),
-                tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [],
+                tags=bu.ensure_theme_tags_list(r.get('themeTags')),
                role='protection',
                added_by='spell_protection'
            )
@ -742,7 +742,7 @@ class SpellAdditionMixin:
        if df is None or df.empty or 'type' not in df.columns:
            return
        try:
-            context = self.get_theme_context()  # type: ignore[attr-defined]
+            context = self.get_theme_context()
        except Exception:
            context = None
        if context is None or not getattr(context, 'ordered_targets', []):
@ -879,7 +879,7 @@ class SpellAdditionMixin:
                    card_type=row.get('type', ''),
                    mana_cost=row.get('manaCost', ''),
                    mana_value=row.get('manaValue', row.get('cmc', '')),
-                    tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                    tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                    role='theme_spell',
                    sub_role=role,
                    added_by='spell_theme_fill',
@ -942,7 +942,7 @@ class SpellAdditionMixin:
                        card_type=row.get('type', ''),
                        mana_cost=row.get('manaCost', ''),
                        mana_value=row.get('manaValue', row.get('cmc', '')),
-                        tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [],
+                        tags=bu.ensure_theme_tags_list(row.get('themeTags')),
                        role='theme_spell',
                        sub_role='fill_multi',
                        added_by='spell_theme_fill',
@ -1006,7 +1006,7 @@ class SpellAdditionMixin:
                        card_type=r0.get('type',''),
                        mana_cost=r0.get('manaCost',''),
                        mana_value=r0.get('manaValue', r0.get('cmc','')),
-                        tags=r0.get('themeTags', []) if isinstance(r0.get('themeTags', []), list) else [],
+                        tags=bu.ensure_theme_tags_list(r0.get('themeTags')),
                        role='filler',
                        sub_role=r0.get('_fillerCat',''),
                        added_by='spell_general_filler'
@ -1058,4 +1058,4 @@ class SpellAdditionMixin:
        """
        """Public method for orchestration: delegates to add_non_creature_spells."""
        return self.add_non_creature_spells()
-    
+    
--- a/code/deck_builder/phases/phase5_color_balance.py
+++ b/code/deck_builder/phases/phase5_color_balance.py
@ -159,7 +159,8 @@ class ColorBalanceMixin:
            self.output_func("  (No viable swaps executed.)")

        # Always consider basic-land rebalance when requested
-        if rebalance_basics:
+        # M5: Skip rebalance for colorless commanders (they should have only Wastes)
+        if rebalance_basics and self.color_identity:  # Only rebalance if commander has colors
            try:
                basic_map = getattr(bc, 'COLOR_TO_BASIC_LAND', {})
                basics_present = {nm: entry for nm, entry in self.card_library.items() if nm in basic_map.values()}
--- a/code/deck_builder/phases/phase6_reporting.py
+++ b/code/deck_builder/phases/phase6_reporting.py
@ -7,14 +7,14 @@ import datetime as _dt
 import re as _re
 import logging_util

-from code.deck_builder.summary_telemetry import record_land_summary, record_theme_summary, record_partner_summary
-from code.deck_builder.color_identity_utils import normalize_colors, canon_color_code, color_label_from_code
-from code.deck_builder.shared_copy import build_land_headline, dfc_card_note
+from ..summary_telemetry import record_land_summary, record_theme_summary, record_partner_summary
+from ..color_identity_utils import normalize_colors, canon_color_code, color_label_from_code
+from ..shared_copy import build_land_headline, dfc_card_note

 logger = logging_util.logging.getLogger(__name__)

 try:
-    from prettytable import PrettyTable  # type: ignore
+    from prettytable import PrettyTable
 except Exception:  # pragma: no cover
    PrettyTable = None  # type: ignore

@ -176,7 +176,7 @@ class ReportingMixin:
        """
        try:
            # Lazy import to avoid cycles
-            from deck_builder.enforcement import enforce_bracket_compliance  # type: ignore
+            from deck_builder.enforcement import enforce_bracket_compliance
        except Exception:
            self.output_func("Enforcement module unavailable.")
            return {}
@ -194,7 +194,7 @@ class ReportingMixin:
            if int(total_cards) < 100 and hasattr(self, 'fill_remaining_theme_spells'):
                before = int(total_cards)
                try:
-                    self.fill_remaining_theme_spells()  # type: ignore[attr-defined]
+                    self.fill_remaining_theme_spells()
                except Exception:
                    pass
                # Recompute after filler
@ -239,13 +239,13 @@ class ReportingMixin:
                csv_name = base_stem + ".csv"
                txt_name = base_stem + ".txt"
                # Overwrite exports with updated library
-                self.export_decklist_csv(directory='deck_files', filename=csv_name, suppress_output=True)  # type: ignore[attr-defined]
-                self.export_decklist_text(directory='deck_files', filename=txt_name, suppress_output=True)  # type: ignore[attr-defined]
+                self.export_decklist_csv(directory='deck_files', filename=csv_name, suppress_output=True)
+                self.export_decklist_text(directory='deck_files', filename=txt_name, suppress_output=True)
                # Re-export the JSON config to reflect any changes from enforcement
                json_name = base_stem + ".json"
-                self.export_run_config_json(directory='config', filename=json_name, suppress_output=True)  # type: ignore[attr-defined]
+                self.export_run_config_json(directory='config', filename=json_name, suppress_output=True)
                # Recompute and write compliance next to them
-                self.compute_and_print_compliance(base_stem=base_stem)  # type: ignore[attr-defined]
+                self.compute_and_print_compliance(base_stem=base_stem)
                # Inject enforcement details into the saved compliance JSON for UI transparency
                comp_path = _os.path.join('deck_files', f"{base_stem}_compliance.json")
                try:
@ -259,18 +259,18 @@ class ReportingMixin:
                    pass
            else:
                # Fall back to default export flow
-                csv_path = self.export_decklist_csv()  # type: ignore[attr-defined]
+                csv_path = self.export_decklist_csv()
                try:
                    base, _ = _os.path.splitext(csv_path)
                    base_only = _os.path.basename(base)
                except Exception:
                    base_only = None
-                self.export_decklist_text(filename=(base_only + '.txt') if base_only else None)  # type: ignore[attr-defined]
+                self.export_decklist_text(filename=(base_only + '.txt') if base_only else None)
                # Re-export JSON config after enforcement changes
                if base_only:
-                    self.export_run_config_json(directory='config', filename=base_only + '.json', suppress_output=True)  # type: ignore[attr-defined]
+                    self.export_run_config_json(directory='config', filename=base_only + '.json', suppress_output=True)
                if base_only:
-                    self.compute_and_print_compliance(base_stem=base_only)  # type: ignore[attr-defined]
+                    self.compute_and_print_compliance(base_stem=base_only)
                    # Inject enforcement into written JSON as above
                    try:
                        comp_path = _os.path.join('deck_files', f"{base_only}_compliance.json")
@ -294,7 +294,7 @@ class ReportingMixin:
        """
        try:
            # Late import to avoid circulars in some environments
-            from deck_builder.brackets_compliance import evaluate_deck  # type: ignore
+            from deck_builder.brackets_compliance import evaluate_deck
        except Exception:
            self.output_func("Bracket compliance module unavailable.")
            return {}
@ -373,7 +373,7 @@ class ReportingMixin:
        full_df = getattr(self, '_full_cards_df', None)
        combined_df = getattr(self, '_combined_cards_df', None)
        snapshot = full_df if full_df is not None else combined_df
-        row_lookup: Dict[str, any] = {}
+        row_lookup: Dict[str, Any] = {}
        if snapshot is not None and hasattr(snapshot, 'empty') and not snapshot.empty and 'name' in snapshot.columns:
            for _, r in snapshot.iterrows():
                nm = str(r.get('name'))
@ -429,7 +429,7 @@ class ReportingMixin:

        # Surface land vs. MDFC counts for CLI users to mirror web summary copy
        try:
-            summary = self.build_deck_summary()  # type: ignore[attr-defined]
+            summary = self.build_deck_summary()
        except Exception:
            summary = None
        if isinstance(summary, dict):
@ -483,9 +483,9 @@ class ReportingMixin:
        full_df = getattr(self, '_full_cards_df', None)
        combined_df = getattr(self, '_combined_cards_df', None)
        snapshot = full_df if full_df is not None else combined_df
-        row_lookup: Dict[str, any] = {}
+        row_lookup: Dict[str, Any] = {}
        if snapshot is not None and not getattr(snapshot, 'empty', True) and 'name' in snapshot.columns:
-            for _, r in snapshot.iterrows():  # type: ignore[attr-defined]
+            for _, r in snapshot.iterrows():
                nm = str(r.get('name'))
                if nm and nm not in row_lookup:
                    row_lookup[nm] = r
@ -521,7 +521,7 @@ class ReportingMixin:

        builder_utils_module = None
        try:
-            from deck_builder import builder_utils as _builder_utils  # type: ignore
+            from deck_builder import builder_utils as _builder_utils
            builder_utils_module = _builder_utils
            color_matrix = builder_utils_module.compute_color_source_matrix(self.card_library, full_df)
        except Exception:
@ -543,6 +543,9 @@ class ReportingMixin:
                        mf_info = {}
                    faces_meta = list(mf_info.get('faces', [])) if isinstance(mf_info, dict) else []
                    layout_val = mf_info.get('layout') if isinstance(mf_info, dict) else None
+                    # M9: If no colors found from mana production, try extracting from face metadata
+                    if not card_colors and isinstance(mf_info, dict):
+                        card_colors = list(mf_info.get('colors', []))
                dfc_land_lookup[name] = {
                    'adds_extra_land': counts_as_extra,
                    'counts_as_land': not counts_as_extra,
@ -681,13 +684,14 @@ class ReportingMixin:
                    'faces': faces_meta,
                    'layout': layout_val,
                })
-                if adds_extra:
-                    dfc_extra_total += copies
+                # M9: Count ALL MDFC lands for land summary
+                dfc_extra_total += copies
        total_sources = sum(source_counts.values())
        traditional_lands = type_counts.get('Land', 0)
+        # M9: dfc_extra_total now contains ALL MDFC lands, not just extras
        land_summary = {
            'traditional': traditional_lands,
-            'dfc_lands': dfc_extra_total,
+            'dfc_lands': dfc_extra_total,  # M9: Count of all MDFC lands
            'with_dfc': traditional_lands + dfc_extra_total,
            'dfc_cards': dfc_details,
            'headline': build_land_headline(traditional_lands, dfc_extra_total, traditional_lands + dfc_extra_total),
@ -852,7 +856,7 @@ class ReportingMixin:
        full_df = getattr(self, '_full_cards_df', None)
        combined_df = getattr(self, '_combined_cards_df', None)
        snapshot = full_df if full_df is not None else combined_df
-        row_lookup: Dict[str, any] = {}
+        row_lookup: Dict[str, Any] = {}
        if snapshot is not None and not snapshot.empty and 'name' in snapshot.columns:
            for _, r in snapshot.iterrows():
                nm = str(r.get('name'))
@ -1124,7 +1128,7 @@ class ReportingMixin:
        full_df = getattr(self, '_full_cards_df', None)
        combined_df = getattr(self, '_combined_cards_df', None)
        snapshot = full_df if full_df is not None else combined_df
-        row_lookup: Dict[str, any] = {}
+        row_lookup: Dict[str, Any] = {}
        if snapshot is not None and not snapshot.empty and 'name' in snapshot.columns:
            for _, r in snapshot.iterrows():
                nm = str(r.get('name'))
@ -1132,7 +1136,7 @@ class ReportingMixin:
                    row_lookup[nm] = r

        try:
-            from deck_builder import builder_utils as _builder_utils  # type: ignore
+            from deck_builder import builder_utils as _builder_utils
            color_matrix = _builder_utils.compute_color_source_matrix(self.card_library, full_df)
        except Exception:
            color_matrix = {}
@ -1383,3 +1387,4 @@ class ReportingMixin:
        """
    # Card library printout suppressed; use CSV and text export for card list.
    pass
+
--- a/code/deck_builder/random_entrypoint.py
+++ b/code/deck_builder/random_entrypoint.py
@ -425,12 +425,20 @@ class RandomBuildResult:


 def _load_commanders_df() -> pd.DataFrame:
-    """Load commander CSV using the same path/converters as the builder.
+    """Load commanders from Parquet using isCommander boolean flag.

-    Uses bc.COMMANDER_CSV_PATH and bc.COMMANDER_CONVERTERS for consistency.
+    M4: Migrated from CSV to Parquet loading with boolean filtering.
    """
-    df = pd.read_csv(bc.COMMANDER_CSV_PATH, converters=getattr(bc, "COMMANDER_CONVERTERS", None))
-    return _ensure_theme_tag_cache(df)
+    from . import builder_utils as bu
+    
+    # Load all cards from Parquet
+    df = bu._load_all_cards_parquet()
+    if df.empty:
+        return pd.DataFrame()
+    
+    # Filter to commanders using boolean flag
+    commanders_df = bc.get_commanders(df)
+    return _ensure_theme_tag_cache(commanders_df)


 def _ensure_theme_tag_cache(df: pd.DataFrame) -> pd.DataFrame:
@ -877,7 +885,7 @@ def _filter_multi(df: pd.DataFrame, primary: Optional[str], secondary: Optional[
        if index_map is None:
            _ensure_theme_tag_index(current_df)
            index_map = current_df.attrs.get("_ltag_index") or {}
-        return index_map  # type: ignore[return-value]
+        return index_map

    index_map_all = _get_index_map(df)

@ -1039,7 +1047,7 @@ def _check_constraints(candidate_count: int, constraints: Optional[Dict[str, Any
    if not constraints:
        return
    try:
-        req_min = constraints.get("require_min_candidates")  # type: ignore[attr-defined]
+        req_min = constraints.get("require_min_candidates")
    except Exception:
        req_min = None
    if req_min is None:
@ -1428,7 +1436,7 @@ def build_random_full_deck(
    primary_choice_idx, secondary_choice_idx, tertiary_choice_idx = _resolve_theme_choices_for_headless(base.commander, base)

    try:
-        from headless_runner import run as _run  # type: ignore
+        from headless_runner import run as _run
    except Exception as e:
        return RandomFullBuildResult(
            seed=base.seed,
@ -1474,7 +1482,7 @@ def build_random_full_deck(
    summary: Dict[str, Any] | None = None
    try:
        if hasattr(builder, 'build_deck_summary'):
-            summary = builder.build_deck_summary()  # type: ignore[attr-defined]
+            summary = builder.build_deck_summary()
    except Exception:
        summary = None

@ -1551,7 +1559,7 @@ def build_random_full_deck(
        if isinstance(custom_base, str) and custom_base.strip():
            meta_payload["name"] = custom_base.strip()
        try:
-            commander_meta = builder.get_commander_export_metadata()  # type: ignore[attr-defined]
+            commander_meta = builder.get_commander_export_metadata()
        except Exception:
            commander_meta = {}
        names = commander_meta.get("commander_names") or []
@ -1581,8 +1589,8 @@ def build_random_full_deck(
    try:
        import os as _os
        import json as _json
-        csv_path = getattr(builder, 'last_csv_path', None)  # type: ignore[attr-defined]
-        txt_path = getattr(builder, 'last_txt_path', None)  # type: ignore[attr-defined]
+        csv_path = getattr(builder, 'last_csv_path', None)
+        txt_path = getattr(builder, 'last_txt_path', None)
        if csv_path and isinstance(csv_path, str):
            base_path, _ = _os.path.splitext(csv_path)
            # If txt missing but expected, look for sibling
@ -1600,7 +1608,7 @@ def build_random_full_deck(
                # Compute compliance if not already saved
                try:
                    if hasattr(builder, 'compute_and_print_compliance'):
-                        compliance = builder.compute_and_print_compliance(base_stem=_os.path.basename(base_path))  # type: ignore[attr-defined]
+                        compliance = builder.compute_and_print_compliance(base_stem=_os.path.basename(base_path))
                except Exception:
                    compliance = None
            # Write summary sidecar if missing
@ -1638,7 +1646,7 @@ def build_random_full_deck(
                        csv_path = existing_base
                        base_path, _ = _os.path.splitext(csv_path)
                    else:
-                        tmp_csv = builder.export_decklist_csv()  # type: ignore[attr-defined]
+                        tmp_csv = builder.export_decklist_csv()
                        stem_base, ext = _os.path.splitext(tmp_csv)
                        if stem_base.endswith('_1'):
                            original = stem_base[:-2] + ext
@ -1654,13 +1662,13 @@ def build_random_full_deck(
                        if _os.path.isfile(target_txt):
                            txt_path = target_txt
                        else:
-                            tmp_txt = builder.export_decklist_text(filename=_os.path.basename(base_path) + '.txt')  # type: ignore[attr-defined]
+                            tmp_txt = builder.export_decklist_text(filename=_os.path.basename(base_path) + '.txt')
                            if tmp_txt.endswith('_1.txt') and _os.path.isfile(target_txt):
                                txt_path = target_txt
                            else:
                                txt_path = tmp_txt
                    if hasattr(builder, 'compute_and_print_compliance'):
-                        compliance = builder.compute_and_print_compliance(base_stem=_os.path.basename(base_path))  # type: ignore[attr-defined]
+                        compliance = builder.compute_and_print_compliance(base_stem=_os.path.basename(base_path))
                    if summary:
                        sidecar = base_path + '.summary.json'
                        if not _os.path.isfile(sidecar):
--- a/code/deck_builder/summary_telemetry.py
+++ b/code/deck_builder/summary_telemetry.py
@ -167,7 +167,7 @@ def _reset_metrics_for_test() -> None:
 def _sanitize_theme_list(values: Iterable[Any]) -> list[str]:
    sanitized: list[str] = []
    seen: set[str] = set()
-    for raw in values or []:  # type: ignore[arg-type]
+    for raw in values or []:
        text = str(raw or "").strip()
        if not text:
            continue
--- a/code/deck_builder/theme_catalog_loader.py
+++ b/code/deck_builder/theme_catalog_loader.py
@ -9,9 +9,9 @@ from functools import lru_cache
 from pathlib import Path
 from typing import Iterable, Tuple

-from code.logging_util import get_logger
+import logging_util

-LOGGER = get_logger(__name__)
+LOGGER = logging_util.get_logger(__name__)

 ROOT = Path(__file__).resolve().parents[2]
 DEFAULT_CATALOG_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
@ -183,7 +183,7 @@ def _iter_json_themes(payload: object) -> Iterable[ThemeCatalogEntry]:
    try:
        from type_definitions_theme_catalog import ThemeCatalog  # pragma: no cover - primary import path
    except ImportError:  # pragma: no cover - fallback when running as package
-        from code.type_definitions_theme_catalog import ThemeCatalog  # type: ignore
+        from code.type_definitions_theme_catalog import ThemeCatalog

    try:
        catalog = ThemeCatalog.model_validate(payload)
--- a/code/deck_builder/theme_matcher.py
+++ b/code/deck_builder/theme_matcher.py
@ -7,7 +7,7 @@ from dataclasses import dataclass
 from functools import lru_cache
 from typing import Iterable, List, Sequence

-from code.deck_builder.theme_catalog_loader import ThemeCatalogEntry
+from .theme_catalog_loader import ThemeCatalogEntry

 __all__ = [
    "normalize_theme",
--- a/code/file_setup/init.py
+++ b/code/file_setup/init.py
@ -1,8 +1,8 @@
 """Initialize the file_setup package."""

-from .setup import setup, regenerate_csv_by_color
+from .setup import initial_setup, regenerate_processed_parquet

 __all__ = [
-    'setup',
-    'regenerate_csv_by_color'
+    'initial_setup',
+    'regenerate_processed_parquet'
 ]
--- a/code/file_setup/card_aggregator.py
+++ b/code/file_setup/card_aggregator.py
@ -0,0 +1,367 @@
+"""
+Card Data Aggregator
+
+Consolidates individual card CSV files into a single Parquet file for improved
+performance in card browsing, theme cataloging, and searches.
+
+Key Features:
+- Merges all card CSVs into all_cards.parquet (50-70% size reduction, 2-5x faster)
+- Excludes master files (cards.csv, commander_cards.csv) from aggregation
+- Deduplication logic (keeps most recent when card appears in multiple files)
+- Incremental updates (only re-process changed files)
+- Version rotation (maintains 2-3 historical versions for rollback)
+- Validation (ensures no data loss)
+
+Usage:
+    aggregator = CardAggregator()
+    stats = aggregator.aggregate_all('csv_files', 'card_files/all_cards.parquet')
+"""
+
+from __future__ import annotations
+
+import glob
+import json
+import os
+from datetime import datetime
+from typing import Optional
+
+import pandas as pd
+
+from code.logging_util import get_logger
+
+# Initialize logger
+logger = get_logger(__name__)
+
+
+class CardAggregator:
+    """Aggregates individual card CSV files into a consolidated Parquet file."""
+
+    # Files to exclude from aggregation (master files used for other purposes)
+    EXCLUDED_FILES = {"cards.csv", "commander_cards.csv", "background_cards.csv"}
+
+    def __init__(self, output_dir: Optional[str] = None) -> None:
+        """
+        Initialize CardAggregator.
+
+        Args:
+            output_dir: Directory for output files (defaults to CARD_FILES_DIR env var or 'card_files/')
+        """
+        self.output_dir = output_dir or os.getenv("CARD_FILES_DIR", "card_files")
+        self.ensure_output_dir()
+
+    def ensure_output_dir(self) -> None:
+        """Create output directory if it doesn't exist."""
+        os.makedirs(self.output_dir, exist_ok=True)
+        logger.info(f"Card aggregator output directory: {self.output_dir}")
+
+    def get_card_csvs(self, source_dir: str) -> list[str]:
+        """
+        Get all card CSV files to aggregate, excluding master files.
+
+        Args:
+            source_dir: Directory containing card CSV files
+
+        Returns:
+            List of file paths to aggregate
+        """
+        all_csvs = glob.glob(os.path.join(source_dir, "*.csv"))
+
+        # Filter out excluded files and temporary files
+        filtered = [
+            f
+            for f in all_csvs
+            if os.path.basename(f) not in self.EXCLUDED_FILES
+            and not os.path.basename(f).startswith(".")
+            and not os.path.basename(f).startswith("_temp")
+        ]
+
+        logger.info(
+            f"Found {len(all_csvs)} CSV files, {len(filtered)} to aggregate "
+            f"(excluded {len(all_csvs) - len(filtered)})"
+        )
+
+        return filtered
+
+    def deduplicate_cards(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Remove duplicate card entries, keeping the most recent version.
+
+        Uses the 'name' column as the unique identifier. When duplicates exist,
+        keeps the last occurrence (assumes files are processed in order of modification time).
+
+        Args:
+            df: DataFrame with potential duplicates
+
+        Returns:
+            DataFrame with duplicates removed
+        """
+        if "name" not in df.columns:
+            logger.warning("Cannot deduplicate: 'name' column not found")
+            return df
+
+        original_count = len(df)
+        df_deduped = df.drop_duplicates(subset=["name"], keep="last")
+        removed_count = original_count - len(df_deduped)
+
+        if removed_count > 0:
+            logger.info(f"Removed {removed_count} duplicate cards (kept most recent)")
+
+        return df_deduped
+
+    def aggregate_all(self, source_dir: str, output_path: str) -> dict:
+        """
+        Perform full aggregation of all card CSV files into a single Parquet file.
+
+        Args:
+            source_dir: Directory containing individual card CSV files
+            output_path: Path for output Parquet file
+
+        Returns:
+            Dictionary with aggregation statistics:
+                - files_processed: Number of CSV files aggregated
+                - total_cards: Total cards in output (after deduplication)
+                - duplicates_removed: Number of duplicate cards removed
+                - file_size_mb: Size of output Parquet file in MB
+                - elapsed_seconds: Time taken for aggregation
+
+        Raises:
+            FileNotFoundError: If source_dir doesn't exist
+            ValueError: If no CSV files found to aggregate
+        """
+        start_time = datetime.now()
+
+        if not os.path.exists(source_dir):
+            raise FileNotFoundError(f"Source directory not found: {source_dir}")
+
+        # Get CSV files to aggregate
+        csv_files = self.get_card_csvs(source_dir)
+        if not csv_files:
+            raise ValueError(f"No CSV files found to aggregate in {source_dir}")
+
+        logger.info(f"Starting aggregation of {len(csv_files)} files...")
+
+        # Sort by modification time (oldest first, so newest are kept in deduplication)
+        csv_files_sorted = sorted(csv_files, key=lambda f: os.path.getmtime(f))
+
+        # Read and concatenate all CSV files
+        dfs = []
+        for csv_file in csv_files_sorted:
+            try:
+                # Skip comment lines (lines starting with #) in CSV files
+                df = pd.read_csv(csv_file, low_memory=False, comment='#')
+                if not df.empty:
+                    dfs.append(df)
+            except Exception as e:
+                logger.warning(f"Failed to read {os.path.basename(csv_file)}: {e}")
+                continue
+
+        if not dfs:
+            raise ValueError("No valid CSV files could be read")
+
+        # Concatenate all DataFrames
+        logger.info(f"Concatenating {len(dfs)} DataFrames...")
+        combined_df = pd.concat(dfs, ignore_index=True)
+        original_count = len(combined_df)
+
+        # Deduplicate cards
+        combined_df = self.deduplicate_cards(combined_df)
+        duplicates_removed = original_count - len(combined_df)
+
+        # Convert object columns with mixed types to strings for Parquet compatibility
+        # Common columns that may have mixed types: power, toughness, keywords
+        for col in ["power", "toughness", "keywords"]:
+            if col in combined_df.columns:
+                combined_df[col] = combined_df[col].astype(str)
+
+        # Rotate existing versions before writing new file
+        self.rotate_versions(output_path, keep_versions=3)
+
+        # Write to Parquet
+        logger.info(f"Writing {len(combined_df)} cards to {output_path}...")
+        combined_df.to_parquet(output_path, engine="pyarrow", compression="snappy", index=False)
+
+        # Calculate stats
+        elapsed = (datetime.now() - start_time).total_seconds()
+        file_size_mb = os.path.getsize(output_path) / (1024 * 1024)
+
+        stats = {
+            "files_processed": len(csv_files),
+            "total_cards": len(combined_df),
+            "duplicates_removed": duplicates_removed,
+            "file_size_mb": round(file_size_mb, 2),
+            "elapsed_seconds": round(elapsed, 2),
+            "timestamp": datetime.now().isoformat(),
+        }
+
+        logger.info(
+            f"Aggregation complete: {stats['total_cards']} cards "
+            f"({stats['file_size_mb']} MB) in {stats['elapsed_seconds']}s"
+        )
+
+        # Save metadata
+        self._save_metadata(source_dir, output_path, stats)
+
+        return stats
+
+    def detect_changes(self, source_dir: str, metadata_path: str) -> list[str]:
+        """
+        Detect which CSV files have changed since last aggregation.
+
+        Args:
+            source_dir: Directory containing card CSV files
+            metadata_path: Path to metadata JSON file from previous run
+
+        Returns:
+            List of file paths that have been added or modified
+        """
+        if not os.path.exists(metadata_path):
+            logger.info("No previous metadata found, all files considered changed")
+            return self.get_card_csvs(source_dir)
+
+        try:
+            with open(metadata_path, "r", encoding="utf-8") as f:
+                metadata = json.load(f)
+            last_run = datetime.fromisoformat(metadata.get("timestamp", ""))
+        except (json.JSONDecodeError, ValueError, KeyError) as e:
+            logger.warning(f"Invalid metadata file: {e}, treating all files as changed")
+            return self.get_card_csvs(source_dir)
+
+        # Find files modified after last aggregation
+        csv_files = self.get_card_csvs(source_dir)
+        changed_files = [
+            f for f in csv_files if datetime.fromtimestamp(os.path.getmtime(f)) > last_run
+        ]
+
+        logger.info(f"Detected {len(changed_files)} changed files since last aggregation")
+        return changed_files
+
+    def incremental_update(self, changed_files: list[str], output_path: str) -> dict:
+        """
+        Perform incremental update by replacing only changed cards.
+
+        Note: This is a simplified implementation. For production use, consider:
+        - Loading existing Parquet, removing old versions of changed cards, adding new
+        - Currently performs full re-aggregation (simpler, safer for MVP)
+
+        Args:
+            changed_files: List of CSV files that have changed
+            output_path: Path to existing Parquet file to update
+
+        Returns:
+            Dictionary with update statistics
+        """
+        # For MVP, we'll perform a full aggregation instead of true incremental update
+        # True incremental update would require:
+        # 1. Load existing Parquet
+        # 2. Identify cards from changed files
+        # 3. Remove old versions of those cards
+        # 4. Add new versions
+        # This is more complex and error-prone, so we'll defer to a future iteration
+
+        logger.info("Incremental update not yet implemented, performing full aggregation")
+        source_dir = os.path.dirname(changed_files[0]) if changed_files else "csv_files"
+        return self.aggregate_all(source_dir, output_path)
+
+    def validate_output(self, output_path: str, source_dir: str) -> tuple[bool, list[str]]:
+        """
+        Validate the aggregated output file.
+
+        Checks:
+        - File exists and is readable
+        - Contains expected columns
+        - Has reasonable number of cards (>0)
+        - Random sampling matches source data
+
+        Args:
+            output_path: Path to Parquet file to validate
+            source_dir: Original source directory for comparison
+
+        Returns:
+            Tuple of (is_valid, list_of_errors)
+        """
+        errors = []
+
+        # Check file exists
+        if not os.path.exists(output_path):
+            errors.append(f"Output file not found: {output_path}")
+            return False, errors
+
+        try:
+            # Load Parquet file
+            df = pd.read_parquet(output_path, engine="pyarrow")
+
+            # Check not empty
+            if df.empty:
+                errors.append("Output file is empty")
+
+            # Check has 'name' column at minimum
+            if "name" not in df.columns:
+                errors.append("Output file missing 'name' column")
+
+            # Check for reasonable card count (at least 100 cards expected in any real dataset)
+            if len(df) < 100:
+                logger.warning(f"Output has only {len(df)} cards (expected more)")
+
+            logger.info(f"Validation passed: {len(df)} cards with {len(df.columns)} columns")
+
+        except Exception as e:
+            errors.append(f"Failed to read/validate output file: {e}")
+
+        return len(errors) == 0, errors
+
+    def rotate_versions(self, output_path: str, keep_versions: int = 3) -> None:
+        """
+        Rotate historical versions of the output file.
+
+        Keeps the last N versions as backups (e.g., all_cards_v1.parquet, all_cards_v2.parquet).
+
+        Args:
+            output_path: Path to current output file
+            keep_versions: Number of historical versions to keep (default: 3)
+        """
+        if not os.path.exists(output_path):
+            return  # Nothing to rotate
+
+        # Parse output path
+        base_dir = os.path.dirname(output_path)
+        filename = os.path.basename(output_path)
+        name, ext = os.path.splitext(filename)
+
+        # Rotate existing versions (v2 -> v3, v1 -> v2, current -> v1)
+        for version in range(keep_versions - 1, 0, -1):
+            old_path = os.path.join(base_dir, f"{name}_v{version}{ext}")
+            new_path = os.path.join(base_dir, f"{name}_v{version + 1}{ext}")
+
+            if os.path.exists(old_path):
+                if version + 1 > keep_versions:
+                    # Delete oldest version
+                    os.remove(old_path)
+                    logger.info(f"Deleted old version: {os.path.basename(old_path)}")
+                else:
+                    # Rename to next version
+                    os.rename(old_path, new_path)
+                    logger.info(
+                        f"Rotated {os.path.basename(old_path)} -> {os.path.basename(new_path)}"
+                    )
+
+        # Move current file to v1
+        v1_path = os.path.join(base_dir, f"{name}_v1{ext}")
+        if os.path.exists(output_path):
+            os.rename(output_path, v1_path)
+            logger.info(f"Rotated current file to {os.path.basename(v1_path)}")
+
+    def _save_metadata(self, source_dir: str, output_path: str, stats: dict) -> None:
+        """Save aggregation metadata for incremental updates."""
+        metadata_path = os.path.join(self.output_dir, ".aggregate_metadata.json")
+
+        metadata = {
+            "source_dir": source_dir,
+            "output_path": output_path,
+            "last_aggregation": stats["timestamp"],
+            "stats": stats,
+        }
+
+        with open(metadata_path, "w", encoding="utf-8") as f:
+            json.dump(metadata, f, indent=2)
+
+        logger.info(f"Saved aggregation metadata to {metadata_path}")
--- a/code/file_setup/data_loader.py
+++ b/code/file_setup/data_loader.py
@ -0,0 +1,338 @@
+"""Data loader abstraction for CSV and Parquet formats.
+
+This module provides a unified interface for reading and writing card data
+in both CSV and Parquet formats. It handles format detection, conversion,
+and schema validation.
+
+Introduced in v3.0.0 as part of the Parquet migration.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import List, Optional
+
+import pandas as pd
+
+from logging_util import get_logger
+from path_util import card_files_processed_dir
+
+logger = get_logger(__name__)
+
+
+# Required columns for deck building
+REQUIRED_COLUMNS = [
+    "name",
+    "colorIdentity",
+    "type",  # MTGJSON uses 'type' not 'types'
+    "keywords",
+    "manaValue",
+    "text",
+    "power",
+    "toughness",
+]
+
+
+def validate_schema(df: pd.DataFrame, required: Optional[List[str]] = None) -> None:
+    """Validate that DataFrame contains required columns.
+    
+    Args:
+        df: DataFrame to validate
+        required: List of required columns (uses REQUIRED_COLUMNS if None)
+    
+    Raises:
+        ValueError: If required columns are missing
+    """
+    required = required or REQUIRED_COLUMNS
+    missing = [col for col in required if col not in df.columns]
+    
+    if missing:
+        raise ValueError(
+            f"Schema validation failed: missing required columns {missing}. "
+            f"Available columns: {list(df.columns)}"
+        )
+    
+    logger.debug(f"✓ Schema validation passed ({len(required)} required columns present)")
+
+
+class DataLoader:
+    """Unified data loading interface supporting CSV and Parquet formats.
+    
+    This class provides transparent access to card data regardless of the
+    underlying storage format. It automatically detects the format based on
+    file extensions and provides conversion utilities.
+    
+    Examples:
+        >>> loader = DataLoader()
+        >>> df = loader.read_cards("card_files/processed/all_cards.parquet")
+        >>> loader.write_cards(df, "output.parquet")
+        >>> loader.convert("input.csv", "output.parquet")
+    """
+    
+    def __init__(self, format: str = "auto"):
+        """Initialize the data loader.
+        
+        Args:
+            format: Format preference - "csv", "parquet", or "auto" (default: auto)
+                   "auto" detects format from file extension
+        """
+        self.format = format.lower()
+        if self.format not in ("csv", "parquet", "auto"):
+            raise ValueError(f"Unsupported format: {format}. Use 'csv', 'parquet', or 'auto'.")
+    
+    def read_cards(
+        self,
+        path: str,
+        columns: Optional[List[str]] = None,
+        format: Optional[str] = None
+    ) -> pd.DataFrame:
+        """Load card data from a file.
+        
+        Args:
+            path: File path (e.g., "card_files/processed/all_cards.parquet")
+            columns: Optional list of columns to load (Parquet optimization)
+            format: Override format detection (uses self.format if None)
+        
+        Returns:
+            DataFrame with card data
+        
+        Raises:
+            FileNotFoundError: If the file doesn't exist
+            ValueError: If format is unsupported
+        """
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"Card data file not found: {path}")
+        
+        detected_format = format or self._detect_format(path)
+        
+        logger.debug(f"Loading card data from {path} (format: {detected_format})")
+        
+        if detected_format == "csv":
+            return self._read_csv(path, columns)
+        elif detected_format == "parquet":
+            return self._read_parquet(path, columns)
+        else:
+            raise ValueError(f"Unsupported format: {detected_format}")
+    
+    def write_cards(
+        self,
+        df: pd.DataFrame,
+        path: str,
+        format: Optional[str] = None,
+        index: bool = False
+    ) -> None:
+        """Save card data to a file.
+        
+        Args:
+            df: DataFrame to save
+            path: Output file path
+            format: Force format (overrides auto-detection)
+            index: Whether to write DataFrame index (default: False)
+        
+        Raises:
+            ValueError: If format is unsupported
+        """
+        detected_format = format or self._detect_format(path)
+        
+        # Ensure output directory exists
+        os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
+        
+        logger.debug(f"Writing card data to {path} (format: {detected_format}, rows: {len(df)})")
+        
+        if detected_format == "csv":
+            self._write_csv(df, path, index)
+        elif detected_format == "parquet":
+            self._write_parquet(df, path, index)
+        else:
+            raise ValueError(f"Unsupported format: {detected_format}")
+    
+    def convert(
+        self,
+        src_path: str,
+        dst_path: str,
+        columns: Optional[List[str]] = None
+    ) -> None:
+        """Convert between CSV and Parquet formats.
+        
+        Args:
+            src_path: Source file path
+            dst_path: Destination file path
+            columns: Optional list of columns to include (all if None)
+        
+        Examples:
+            >>> loader.convert("cards.csv", "cards.parquet")
+            >>> loader.convert("cards.parquet", "cards.csv", columns=["name", "type"])
+        """
+        logger.info(f"Converting {src_path} → {dst_path}")
+        df = self.read_cards(src_path, columns=columns)
+        self.write_cards(df, dst_path)
+        logger.info(f"✓ Converted {len(df)} cards")
+    
+    def _read_csv(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
+        """Read CSV file."""
+        try:
+            return pd.read_csv(path, usecols=columns, low_memory=False)
+        except Exception as e:
+            logger.error(f"Failed to read CSV from {path}: {e}")
+            raise
+    
+    def _read_parquet(self, path: str, columns: Optional[List[str]] = None) -> pd.DataFrame:
+        """Read Parquet file."""
+        try:
+            return pd.read_parquet(path, columns=columns)
+        except Exception as e:
+            logger.error(f"Failed to read Parquet from {path}: {e}")
+            raise
+    
+    def _write_csv(self, df: pd.DataFrame, path: str, index: bool) -> None:
+        """Write CSV file."""
+        try:
+            df.to_csv(path, index=index)
+        except Exception as e:
+            logger.error(f"Failed to write CSV to {path}: {e}")
+            raise
+    
+    def _write_parquet(self, df: pd.DataFrame, path: str, index: bool) -> None:
+        """Write Parquet file with Snappy compression."""
+        try:
+            df.to_parquet(path, index=index, compression="snappy", engine="pyarrow")
+        except Exception as e:
+            logger.error(f"Failed to write Parquet to {path}: {e}")
+            raise
+    
+    def _detect_format(self, path: str) -> str:
+        """Detect file format from extension.
+        
+        Args:
+            path: File path to analyze
+        
+        Returns:
+            Format string: "csv" or "parquet"
+        
+        Raises:
+            ValueError: If format cannot be determined
+        """
+        if self.format != "auto":
+            return self.format
+        
+        # Check file extension
+        if path.endswith(".csv"):
+            return "csv"
+        elif path.endswith(".parquet"):
+            return "parquet"
+        
+        # Try to infer from existing files (no extension provided)
+        if os.path.exists(f"{path}.parquet"):
+            return "parquet"
+        elif os.path.exists(f"{path}.csv"):
+            return "csv"
+        
+        raise ValueError(
+            f"Cannot determine format for '{path}'. "
+            "Use .csv or .parquet extension, or specify format explicitly."
+        )
+    
+    def write_batch_parquet(
+        self,
+        df: pd.DataFrame,
+        batch_id: int,
+        tag: str = "",
+        batches_dir: Optional[str] = None
+    ) -> str:
+        """Write a batch Parquet file (used during tagging).
+        
+        Args:
+            df: DataFrame to save as a batch
+            batch_id: Unique batch identifier (e.g., 0, 1, 2...)
+            tag: Optional tag to include in filename (e.g., "white", "commander")
+            batches_dir: Directory for batch files (defaults to card_files/processed/batches)
+        
+        Returns:
+            Path to the written batch file
+        
+        Example:
+            >>> loader.write_batch_parquet(white_df, batch_id=0, tag="white")
+            'card_files/processed/batches/batch_0_white.parquet'
+        """
+        if batches_dir is None:
+            batches_dir = os.path.join(card_files_processed_dir(), "batches")
+        
+        os.makedirs(batches_dir, exist_ok=True)
+        
+        # Build filename: batch_{id}_{tag}.parquet or batch_{id}.parquet
+        filename = f"batch_{batch_id}_{tag}.parquet" if tag else f"batch_{batch_id}.parquet"
+        path = os.path.join(batches_dir, filename)
+        
+        logger.debug(f"Writing batch {batch_id} ({tag or 'no tag'}): {len(df)} cards → {path}")
+        self.write_cards(df, path, format="parquet")
+        
+        return path
+    
+    def merge_batches(
+        self,
+        output_path: Optional[str] = None,
+        batches_dir: Optional[str] = None,
+        cleanup: bool = True
+    ) -> pd.DataFrame:
+        """Merge all batch Parquet files into a single output file.
+        
+        Args:
+            output_path: Path for merged output (defaults to card_files/processed/all_cards.parquet)
+            batches_dir: Directory containing batch files (defaults to card_files/processed/batches)
+            cleanup: Whether to delete batch files after merging (default: True)
+        
+        Returns:
+            Merged DataFrame
+        
+        Raises:
+            FileNotFoundError: If no batch files found
+        
+        Example:
+            >>> loader.merge_batches()  # Merges all batches → all_cards.parquet
+        """
+        if batches_dir is None:
+            batches_dir = os.path.join(card_files_processed_dir(), "batches")
+        
+        if output_path is None:
+            from code.path_util import get_processed_cards_path
+            output_path = get_processed_cards_path()
+        
+        # Find all batch files
+        batch_files = sorted(Path(batches_dir).glob("batch_*.parquet"))
+        
+        if not batch_files:
+            raise FileNotFoundError(f"No batch files found in {batches_dir}")
+        
+        logger.info(f"Merging {len(batch_files)} batch files from {batches_dir}")
+        
+        # Read and concatenate all batches
+        dfs = []
+        for batch_file in batch_files:
+            logger.debug(f"Reading batch: {batch_file.name}")
+            df = self.read_cards(str(batch_file), format="parquet")
+            dfs.append(df)
+        
+        # Merge all batches
+        merged_df = pd.concat(dfs, ignore_index=True)
+        logger.info(f"Merged {len(merged_df)} total cards from {len(dfs)} batches")
+        
+        # Write merged output
+        self.write_cards(merged_df, output_path, format="parquet")
+        logger.info(f"✓ Wrote merged data to {output_path}")
+        
+        # Cleanup batch files if requested
+        if cleanup:
+            logger.debug(f"Cleaning up {len(batch_files)} batch files")
+            for batch_file in batch_files:
+                batch_file.unlink()
+            
+            # Remove batches directory if empty
+            try:
+                Path(batches_dir).rmdir()
+                logger.debug(f"Removed empty batches directory: {batches_dir}")
+            except OSError:
+                pass  # Directory not empty, keep it
+        
+        return merged_df
+
--- a/code/file_setup/image_cache.py
+++ b/code/file_setup/image_cache.py
@ -0,0 +1,567 @@
+"""
+Card image caching system.
+
+Downloads and manages local cache of Magic: The Gathering card images
+from Scryfall, with graceful fallback to API when images are missing.
+
+Features:
+- Optional caching (disabled by default for open source users)
+- Uses Scryfall bulk data API (respects rate limits and guidelines)
+- Downloads from Scryfall CDN (no rate limits on image files)
+- Progress tracking for long downloads
+- Resume capability if interrupted
+- Graceful fallback to API if images missing
+
+Environment Variables:
+    CACHE_CARD_IMAGES: 1=enable caching, 0=disable (default: 0)
+
+Image Sizes:
+    - small: 160px width (for list views)
+    - normal: 488px width (for prominent displays, hover previews)
+
+Directory Structure:
+    card_files/images/small/    - Small thumbnails (~900 MB - 1.5 GB)
+    card_files/images/normal/   - Normal images (~2.4 GB - 4.5 GB)
+
+See: https://scryfall.com/docs/api
+"""
+
+import json
+import logging
+import os
+import re
+import time
+from pathlib import Path
+from typing import Any, Optional
+from urllib.request import Request, urlopen
+
+from code.file_setup.scryfall_bulk_data import ScryfallBulkDataClient
+
+logger = logging.getLogger(__name__)
+
+# Scryfall CDN has no rate limits, but we'll be conservative
+DOWNLOAD_DELAY = 0.05  # 50ms between image downloads (20 req/sec)
+
+# Image sizes to cache
+IMAGE_SIZES = ["small", "normal"]
+
+# Card name sanitization (filesystem-safe)
+INVALID_CHARS = r'[<>:"/\\|?*]'
+
+
+def sanitize_filename(card_name: str) -> str:
+    """
+    Sanitize card name for use as filename.
+
+    Args:
+        card_name: Original card name
+
+    Returns:
+        Filesystem-safe filename
+    """
+    # Replace invalid characters with underscore
+    safe_name = re.sub(INVALID_CHARS, "_", card_name)
+    # Remove multiple consecutive underscores
+    safe_name = re.sub(r"_+", "_", safe_name)
+    # Trim leading/trailing underscores
+    safe_name = safe_name.strip("_")
+    return safe_name
+
+
+class ImageCache:
+    """Manages local card image cache."""
+
+    def __init__(
+        self,
+        base_dir: str = "card_files/images",
+        bulk_data_path: str = "card_files/raw/scryfall_bulk_data.json",
+    ):
+        """
+        Initialize image cache.
+
+        Args:
+            base_dir: Base directory for cached images
+            bulk_data_path: Path to Scryfall bulk data JSON
+        """
+        self.base_dir = Path(base_dir)
+        self.bulk_data_path = Path(bulk_data_path)
+        self.client = ScryfallBulkDataClient()
+        self._last_download_time: float = 0.0
+
+    def is_enabled(self) -> bool:
+        """Check if image caching is enabled via environment variable."""
+        return os.getenv("CACHE_CARD_IMAGES", "0") == "1"
+
+    def get_image_path(self, card_name: str, size: str = "normal") -> Optional[Path]:
+        """
+        Get local path to cached image if it exists.
+
+        Args:
+            card_name: Card name
+            size: Image size ('small' or 'normal')
+
+        Returns:
+            Path to cached image, or None if not cached
+        """
+        if not self.is_enabled():
+            return None
+
+        safe_name = sanitize_filename(card_name)
+        image_path = self.base_dir / size / f"{safe_name}.jpg"
+
+        if image_path.exists():
+            return image_path
+        return None
+
+    def get_image_url(self, card_name: str, size: str = "normal") -> str:
+        """
+        Get image URL (local path if cached, Scryfall API otherwise).
+
+        Args:
+            card_name: Card name
+            size: Image size ('small' or 'normal')
+
+        Returns:
+            URL or local path to image
+        """
+        # Check local cache first
+        local_path = self.get_image_path(card_name, size)
+        if local_path:
+            # Return as static file path for web serving
+            return f"/static/card_images/{size}/{sanitize_filename(card_name)}.jpg"
+
+        # Fallback to Scryfall API
+        from urllib.parse import quote
+        card_query = quote(card_name)
+        return f"https://api.scryfall.com/cards/named?fuzzy={card_query}&format=image&version={size}"
+
+    def _rate_limit_wait(self) -> None:
+        """Wait to respect rate limits between downloads."""
+        elapsed = time.time() - self._last_download_time
+        if elapsed < DOWNLOAD_DELAY:
+            time.sleep(DOWNLOAD_DELAY - elapsed)
+        self._last_download_time = time.time()
+
+    def _download_image(self, image_url: str, output_path: Path) -> bool:
+        """
+        Download single image from Scryfall CDN.
+
+        Args:
+            image_url: Image URL from bulk data
+            output_path: Local path to save image
+
+        Returns:
+            True if successful, False otherwise
+        """
+        self._rate_limit_wait()
+
+        try:
+            # Ensure output directory exists
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            req = Request(image_url)
+            req.add_header("User-Agent", "MTG-Deckbuilder/3.0 (Image Cache)")
+
+            with urlopen(req, timeout=30) as response:
+                image_data = response.read()
+                with open(output_path, "wb") as f:
+                    f.write(image_data)
+
+            return True
+
+        except Exception as e:
+            logger.debug(f"Failed to download {image_url}: {e}")
+            # Clean up partial download
+            if output_path.exists():
+                output_path.unlink()
+            return False
+
+    def _load_bulk_data(self) -> list[dict[str, Any]]:
+        """
+        Load card data from bulk data JSON.
+
+        Returns:
+            List of card objects with image URLs
+
+        Raises:
+            FileNotFoundError: If bulk data file doesn't exist
+            json.JSONDecodeError: If file is invalid JSON
+        """
+        if not self.bulk_data_path.exists():
+            raise FileNotFoundError(
+                f"Bulk data file not found: {self.bulk_data_path}. "
+                "Run download_bulk_data() first."
+            )
+
+        logger.info(f"Loading bulk data from {self.bulk_data_path}")
+        with open(self.bulk_data_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def _filter_to_our_cards(self, bulk_cards: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """
+        Filter bulk data to only cards in our all_cards.parquet file.
+        Deduplicates by card name (takes first printing only).
+
+        Args:
+            bulk_cards: Full Scryfall bulk data
+
+        Returns:
+            Filtered list of cards matching our dataset (one per unique name)
+        """
+        try:
+            import pandas as pd
+            from code.path_util import get_processed_cards_path
+            
+            # Load our card names
+            parquet_path = get_processed_cards_path()
+            df = pd.read_parquet(parquet_path, columns=["name"])
+            our_card_names = set(df["name"].str.lower())
+            
+            logger.info(f"Filtering {len(bulk_cards)} Scryfall cards to {len(our_card_names)} cards in our dataset")
+            
+            # Filter and deduplicate - keep only first printing of each card
+            seen_names = set()
+            filtered = []
+            
+            for card in bulk_cards:
+                card_name_lower = card.get("name", "").lower()
+                if card_name_lower in our_card_names and card_name_lower not in seen_names:
+                    filtered.append(card)
+                    seen_names.add(card_name_lower)
+            
+            logger.info(f"Filtered to {len(filtered)} unique cards with image data")
+            return filtered
+            
+        except Exception as e:
+            logger.warning(f"Could not filter to our cards: {e}. Using all Scryfall cards.")
+            return bulk_cards
+
+    def download_bulk_data(self, progress_callback=None) -> None:
+        """
+        Download latest Scryfall bulk data JSON.
+
+        Args:
+            progress_callback: Optional callback(bytes_downloaded, total_bytes)
+
+        Raises:
+            Exception: If download fails
+        """
+        logger.info("Downloading Scryfall bulk data...")
+        self.bulk_data_path.parent.mkdir(parents=True, exist_ok=True)
+        self.client.get_bulk_data(
+            output_path=str(self.bulk_data_path),
+            progress_callback=progress_callback,
+        )
+        logger.info("Bulk data download complete")
+
+    def download_images(
+        self,
+        sizes: Optional[list[str]] = None,
+        progress_callback=None,
+        max_cards: Optional[int] = None,
+    ) -> dict[str, int]:
+        """
+        Download card images from Scryfall CDN.
+
+        Args:
+            sizes: Image sizes to download (default: ['small', 'normal'])
+            progress_callback: Optional callback(current, total, card_name)
+            max_cards: Maximum cards to download (for testing)
+
+        Returns:
+            Dictionary with download statistics
+
+        Raises:
+            FileNotFoundError: If bulk data not available
+        """
+        if not self.is_enabled():
+            logger.info("Image caching disabled (CACHE_CARD_IMAGES=0)")
+            return {"skipped": 0}
+
+        if sizes is None:
+            sizes = IMAGE_SIZES
+
+        logger.info(f"Starting image download for sizes: {sizes}")
+
+        # Load bulk data and filter to our cards
+        bulk_cards = self._load_bulk_data()
+        cards = self._filter_to_our_cards(bulk_cards)
+        total_cards = len(cards) if max_cards is None else min(max_cards, len(cards))
+
+        stats = {
+            "total": total_cards,
+            "downloaded": 0,
+            "skipped": 0,
+            "failed": 0,
+        }
+
+        for i, card in enumerate(cards[:total_cards]):
+            card_name = card.get("name")
+            if not card_name:
+                stats["skipped"] += 1
+                continue
+
+            # Collect all faces to download (single-faced or multi-faced)
+            faces_to_download = []
+            
+            # Check if card has direct image_uris (single-faced card)
+            if card.get("image_uris"):
+                faces_to_download.append({
+                    "name": card_name,
+                    "image_uris": card["image_uris"],
+                })
+            # Handle double-faced cards (get all faces)
+            elif card.get("card_faces"):
+                for face_idx, face in enumerate(card["card_faces"]):
+                    if face.get("image_uris"):
+                        # For multi-faced cards, append face name or index
+                        face_name = face.get("name", f"{card_name}_face{face_idx}")
+                        faces_to_download.append({
+                            "name": face_name,
+                            "image_uris": face["image_uris"],
+                        })
+            
+            # Skip if no faces found
+            if not faces_to_download:
+                logger.debug(f"No image URIs for {card_name}")
+                stats["skipped"] += 1
+                continue
+
+            # Download each face in each requested size
+            for face in faces_to_download:
+                face_name = face["name"]
+                image_uris = face["image_uris"]
+                
+                for size in sizes:
+                    image_url = image_uris.get(size)
+                    if not image_url:
+                        continue
+
+                    # Check if already cached
+                    safe_name = sanitize_filename(face_name)
+                    output_path = self.base_dir / size / f"{safe_name}.jpg"
+
+                    if output_path.exists():
+                        stats["skipped"] += 1
+                        continue
+
+                    # Download image
+                    if self._download_image(image_url, output_path):
+                        stats["downloaded"] += 1
+                    else:
+                        stats["failed"] += 1
+
+            # Progress callback
+            if progress_callback:
+                progress_callback(i + 1, total_cards, card_name)
+
+        # Invalidate cached summary since we just downloaded new images
+        self.invalidate_summary_cache()
+
+        logger.info(f"Image download complete: {stats}")
+        return stats
+
+    def cache_statistics(self) -> dict[str, Any]:
+        """
+        Get statistics about cached images.
+        
+        Uses a cached summary.json file to avoid scanning thousands of files.
+        Regenerates summary if it doesn't exist or is stale (based on WEB_AUTO_REFRESH_DAYS,
+        default 7 days, matching the main card data staleness check).
+
+        Returns:
+            Dictionary with cache stats (count, size, etc.)
+        """
+        stats = {"enabled": self.is_enabled()}
+
+        if not self.is_enabled():
+            return stats
+
+        summary_file = self.base_dir / "summary.json"
+        
+        # Get staleness threshold from environment (same as card data check)
+        try:
+            refresh_days = int(os.getenv('WEB_AUTO_REFRESH_DAYS', '7'))
+        except Exception:
+            refresh_days = 7
+        
+        if refresh_days <= 0:
+            # Never consider stale
+            refresh_seconds = float('inf')
+        else:
+            refresh_seconds = refresh_days * 24 * 60 * 60  # Convert days to seconds
+        
+        # Check if summary exists and is recent (less than refresh_seconds old)
+        use_cached = False
+        if summary_file.exists():
+            try:
+                import time
+                file_age = time.time() - summary_file.stat().st_mtime
+                if file_age < refresh_seconds:
+                    use_cached = True
+            except Exception:
+                pass
+        
+        # Try to use cached summary
+        if use_cached:
+            try:
+                import json
+                with summary_file.open('r', encoding='utf-8') as f:
+                    cached_stats = json.load(f)
+                    stats.update(cached_stats)
+                    return stats
+            except Exception as e:
+                logger.warning(f"Could not read cache summary: {e}")
+        
+        # Regenerate summary (fast - just count files and estimate size)
+        for size in IMAGE_SIZES:
+            size_dir = self.base_dir / size
+            if size_dir.exists():
+                # Fast count: count .jpg files without statting each one
+                count = sum(1 for _ in size_dir.glob("*.jpg"))
+                
+                # Estimate total size based on typical averages to avoid stat() calls
+                # Small images: ~40 KB avg, Normal images: ~100 KB avg
+                avg_size_kb = 40 if size == "small" else 100
+                estimated_size_mb = (count * avg_size_kb) / 1024
+                
+                stats[size] = {
+                    "count": count,
+                    "size_mb": round(estimated_size_mb, 1),
+                }
+            else:
+                stats[size] = {"count": 0, "size_mb": 0.0}
+        
+        # Save summary for next time
+        try:
+            import json
+            with summary_file.open('w', encoding='utf-8') as f:
+                json.dump({k: v for k, v in stats.items() if k != "enabled"}, f)
+        except Exception as e:
+            logger.warning(f"Could not write cache summary: {e}")
+
+        return stats
+    
+    def invalidate_summary_cache(self) -> None:
+        """Delete the cached summary file to force regeneration on next call."""
+        if not self.is_enabled():
+            return
+        
+        summary_file = self.base_dir / "summary.json"
+        if summary_file.exists():
+            try:
+                summary_file.unlink()
+                logger.debug("Invalidated cache summary file")
+            except Exception as e:
+                logger.warning(f"Could not delete cache summary: {e}")
+
+
+def main():
+    """CLI entry point for image caching."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Card image cache management")
+    parser.add_argument(
+        "--download",
+        action="store_true",
+        help="Download images from Scryfall",
+    )
+    parser.add_argument(
+        "--stats",
+        action="store_true",
+        help="Show cache statistics",
+    )
+    parser.add_argument(
+        "--max-cards",
+        type=int,
+        help="Maximum cards to download (for testing)",
+    )
+    parser.add_argument(
+        "--sizes",
+        nargs="+",
+        default=IMAGE_SIZES,
+        choices=IMAGE_SIZES,
+        help="Image sizes to download",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force re-download of bulk data even if recent",
+    )
+
+    args = parser.parse_args()
+
+    # Setup logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+
+    cache = ImageCache()
+
+    if args.stats:
+        stats = cache.cache_statistics()
+        print("\nCache Statistics:")
+        print(f"  Enabled: {stats['enabled']}")
+        if stats["enabled"]:
+            for size in IMAGE_SIZES:
+                if size in stats:
+                    print(
+                        f"  {size.capitalize()}: {stats[size]['count']} images "
+                        f"({stats[size]['size_mb']:.1f} MB)"
+                    )
+
+    elif args.download:
+        if not cache.is_enabled():
+            print("Image caching is disabled. Set CACHE_CARD_IMAGES=1 to enable.")
+            return
+
+        # Check if bulk data already exists and is recent (within 24 hours)
+        bulk_data_exists = cache.bulk_data_path.exists()
+        bulk_data_age_hours = None
+        
+        if bulk_data_exists:
+            import time
+            age_seconds = time.time() - cache.bulk_data_path.stat().st_mtime
+            bulk_data_age_hours = age_seconds / 3600
+            print(f"Bulk data file exists (age: {bulk_data_age_hours:.1f} hours)")
+        
+        # Download bulk data if missing, old, or forced
+        if not bulk_data_exists or bulk_data_age_hours > 24 or args.force:
+            print("Downloading Scryfall bulk data...")
+
+            def bulk_progress(downloaded, total):
+                if total > 0:
+                    pct = (downloaded / total) * 100
+                    print(f"  Progress: {downloaded / 1024 / 1024:.1f} MB / "
+                          f"{total / 1024 / 1024:.1f} MB ({pct:.1f}%)", end="\r")
+
+            cache.download_bulk_data(progress_callback=bulk_progress)
+            print("\nBulk data downloaded successfully")
+        else:
+            print("Bulk data is recent, skipping download (use --force to re-download)")
+
+        # Download images
+        print(f"\nDownloading card images (sizes: {', '.join(args.sizes)})...")
+
+        def image_progress(current, total, card_name):
+            pct = (current / total) * 100
+            print(f"  Progress: {current}/{total} ({pct:.1f}%) - {card_name}", end="\r")
+
+        stats = cache.download_images(
+            sizes=args.sizes,
+            progress_callback=image_progress,
+            max_cards=args.max_cards,
+        )
+        print("\n\nDownload complete:")
+        print(f"  Total: {stats['total']}")
+        print(f"  Downloaded: {stats['downloaded']}")
+        print(f"  Skipped: {stats['skipped']}")
+        print(f"  Failed: {stats['failed']}")
+
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
--- a/code/file_setup/old/setup.py
+++ b/code/file_setup/old/setup.py
@ -0,0 +1,362 @@
+"""MTG Python Deckbuilder setup module.
+
+This module provides the main setup functionality for the MTG Python Deckbuilder
+application. It handles initial setup tasks such as downloading card data,
+creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+
+        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
+
+Key Features:
+    - Initial setup and configuration
+    - Card data download and processing
+    - Color-based card filtering
+    - Commander card list generation
+    - CSV file management and validation
+
+The module works in conjunction with setup_utils.py for utility functions and
+exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+from enum import Enum
+import os
+from typing import List, Dict, Any
+
+# Third-party imports (optional)
+try:
+    import inquirer
+except Exception:
+    inquirer = None  # Fallback to simple input-based menu when unavailable
+import pandas as pd
+
+# Local imports
+import logging_util
+from settings import CSV_DIRECTORY
+from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from .setup_utils import (
+    download_cards_csv,
+    filter_dataframe,
+    process_legendary_cards,
+    check_csv_exists,
+    save_color_filtered_csvs,
+    enrich_commander_rows_with_tags,
+)
+from exceptions import (
+    CSVFileNotFoundError,
+    CommanderValidationError,
+    MTGJSONDownloadError
+)
+from scripts import generate_background_cards as background_cards_script
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _generate_background_catalog(cards_path: str, output_path: str) -> None:
+    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
+
+    logger.info('Generating background cards catalog')
+    args = [
+        '--source', cards_path,
+        '--output', output_path,
+    ]
+    try:
+        background_cards_script.main(args)
+    except Exception:  # pragma: no cover - surfaced to caller/test
+        logger.exception('Failed to generate background catalog')
+        raise
+    else:
+        logger.info('Background cards catalog generated successfully')
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Create CSV directory if it doesn't exist
+if not os.path.exists(CSV_DIRECTORY):
+    os.makedirs(CSV_DIRECTORY)
+
+## Note: using shared check_csv_exists from setup_utils to avoid duplication
+
+def initial_setup() -> None:
+    """Perform initial setup by downloading card data and creating filtered CSV files.
+    
+    Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
+    and generates commander-eligible cards list. Uses utility functions from setup_utils.py
+    for file operations and data processing.
+    
+    Raises:
+        CSVFileNotFoundError: If required CSV files cannot be found
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    logger.info('Checking for cards.csv file')
+    
+    try:
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        try:
+            with open(cards_file, 'r', encoding='utf-8'):
+                logger.info('cards.csv exists')
+        except FileNotFoundError:
+            logger.info('cards.csv not found, downloading from mtgjson')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        logger.info('Checking for color identity sorted files')
+        # Generate color-identity filtered CSVs in one pass
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+        
+        # Generate commander list
+        determine_commanders()
+
+    except Exception as e:
+        logger.error(f'Error during initial setup: {str(e)}')
+        raise
+
+## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
+
+def determine_commanders() -> None:
+    """Generate commander_cards.csv containing all cards eligible to be commanders.
+    
+    This function processes the card database to identify and validate commander-eligible cards,
+    applying comprehensive validation steps and filtering criteria.
+    
+    Raises:
+        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
+        MTGJSONDownloadError: If downloading cards data fails
+        CommanderValidationError: If commander validation fails
+        DataFrameProcessingError: If data processing operations fail
+    """
+    logger.info('Starting commander card generation process')
+    
+    try:
+        # Check for cards.csv with progress tracking
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        if not check_csv_exists(cards_file):
+            logger.info('cards.csv not found, initiating download')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        else:
+            logger.info('cards.csv found, proceeding with processing')
+        
+        # Load and process cards data
+        logger.info('Loading card data from CSV')
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        # Process legendary cards with validation
+        logger.info('Processing and validating legendary cards')
+        try:
+            filtered_df = process_legendary_cards(df)
+        except CommanderValidationError as e:
+            logger.error(f'Commander validation failed: {str(e)}')
+            raise
+        
+        # Apply standard filters
+        logger.info('Applying standard card filters')
+        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
+        
+        logger.info('Enriching commander metadata with theme and creature tags')
+        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
+
+        # Save commander cards
+        logger.info('Saving validated commander cards')
+        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
+        filtered_df.to_csv(commander_path, index=False)
+
+        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
+        _generate_background_catalog(cards_file, background_output)
+
+        logger.info('Commander card generation completed successfully')
+        
+    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
+        logger.error(f'File operation error: {str(e)}')
+        raise
+    except CommanderValidationError as e:
+        logger.error(f'Commander validation error: {str(e)}')
+        raise
+    except Exception as e:
+        logger.error(f'Unexpected error during commander generation: {str(e)}')
+        raise
+    
+def regenerate_csvs_all() -> None:
+    """Regenerate all color-filtered CSV files from latest card data.
+    
+    Downloads fresh card data and recreates all color-filtered CSV files.
+    Useful for updating the card database when new sets are released.
+    
+    Raises:
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        logger.info('Downloading latest card data from MTGJSON')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
+        
+        logger.info('Regenerating color identity sorted files')
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+            
+        logger.info('Regenerating commander cards')
+        determine_commanders()
+        
+        logger.info('Card database regeneration complete')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate card database: {str(e)}')
+        raise
+    # Once files are regenerated, create a new legendary list (already executed in try)
+
+def regenerate_csv_by_color(color: str) -> None:
+    """Regenerate CSV file for a specific color identity.
+    
+    Args:
+        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        
+    Raises:
+        ValueError: If color is not valid
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        if color not in SETUP_COLORS:
+            raise ValueError(f'Invalid color: {color}')
+
+        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
+
+        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(
+            f'{CSV_DIRECTORY}/cards.csv',
+            low_memory=False,
+            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
+            encoding_errors='replace'  # Replace bad encoding chars
+        )
+
+        logger.info(f'Regenerating {color} cards CSV')
+        # Use shared utilities to base-filter once then slice color, honoring bans
+        base_df = filter_dataframe(df, BANNED_CARDS)
+        base_df[base_df['colorIdentity'] == color_abv].to_csv(
+            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
+        )
+
+        logger.info(f'Successfully regenerated {color} cards database')
+
+    except Exception as e:
+        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        raise
+
+class SetupOption(Enum):
+    """Enum for setup menu options."""
+    INITIAL_SETUP = 'Initial Setup'
+    REGENERATE_CSV = 'Regenerate CSV Files'
+    BACK = 'Back'
+
+def _display_setup_menu() -> SetupOption:
+    """Display the setup menu and return the selected option.
+    
+    Returns:
+        SetupOption: The selected menu option
+    """
+    if inquirer is not None:
+        question: List[Dict[str, Any]] = [
+            inquirer.List(
+                'menu',
+                choices=[option.value for option in SetupOption],
+                carousel=True)]
+        answer = inquirer.prompt(question)
+        return SetupOption(answer['menu'])
+
+    # Simple fallback when inquirer isn't installed (e.g., headless/container)
+    options = list(SetupOption)
+    print("\nSetup Menu:")
+    for idx, opt in enumerate(options, start=1):
+        print(f"  {idx}) {opt.value}")
+    while True:
+        try:
+            sel = input("Select an option [1]: ").strip() or "1"
+            i = int(sel)
+            if 1 <= i <= len(options):
+                return options[i - 1]
+        except KeyboardInterrupt:
+            print("")
+            return SetupOption.BACK
+        except Exception:
+            pass
+        print("Invalid selection. Please try again.")
+
+def setup() -> bool:
+    """Run the setup process for the MTG Python Deckbuilder.
+    
+    This function provides a menu-driven interface to:
+    1. Perform initial setup by downloading and processing card data
+    2. Regenerate CSV files with updated card data
+    3. Perform all tagging processes on the color-sorted csv files
+    
+    The function handles errors gracefully and provides feedback through logging.
+    
+    Returns:
+        bool: True if setup completed successfully, False otherwise
+    """
+    try:
+        print('Which setup operation would you like to perform?\n'
+              'If this is your first time setting up, do the initial setup.\n'
+              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+        
+        choice = _display_setup_menu()
+        
+        if choice == SetupOption.INITIAL_SETUP:
+            logger.info('Starting initial setup')
+            initial_setup()
+            logger.info('Initial setup completed successfully')
+            return True
+            
+        elif choice == SetupOption.REGENERATE_CSV:
+            logger.info('Starting CSV regeneration')
+            regenerate_csvs_all()
+            logger.info('CSV regeneration completed successfully')
+            return True
+            
+        elif choice == SetupOption.BACK:
+            logger.info('Setup cancelled by user')
+            return False
+            
+    except Exception as e:
+        logger.error(f'Error during setup: {e}')
+        raise
+    
+    return False
--- a/code/file_setup/old/setup_constants.py
+++ b/code/file_setup/old/setup_constants.py
@ -0,0 +1,114 @@
+from typing import Dict, List
+from settings import (
+    SETUP_COLORS,
+    COLOR_ABRV,
+    CARD_DATA_COLUMNS as COLUMN_ORDER,  # backward compatible alias
+    CARD_DATA_COLUMNS as TAGGED_COLUMN_ORDER,
+)
+
+__all__ = [
+    'SETUP_COLORS', 'COLOR_ABRV', 'COLUMN_ORDER', 'TAGGED_COLUMN_ORDER',
+    'BANNED_CARDS', 'MTGJSON_API_URL', 'LEGENDARY_OPTIONS', 'NON_LEGAL_SETS',
+    'CARD_TYPES_TO_EXCLUDE', 'CSV_PROCESSING_COLUMNS', 'SORT_CONFIG',
+    'FILTER_CONFIG'
+]
+
+# Banned cards consolidated here (remains specific to setup concerns)
+BANNED_CARDS: List[str] = [
+    # Commander banned list
+    'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
+    'Chaos Orb', 'Channel', 'Dockside Extortionist',
+    'Emrakul, the Aeons Torn',
+    'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
+    'Flash', 'Golos, Tireless Pilgrim',
+    'Griselbrand', 'Hullbreacher', 'Iona, Shield of Emeria',
+    'Karakas', 'Jeweled Lotus', 'Leovold, Emissary of Trest',
+    'Library of Alexandria', 'Limited Resources', 'Lutri, the Spellchaser',
+    'Mana Crypt', 'Mox Emerald', 'Mox Jet', 'Mox Pearl', 'Mox Ruby',
+    'Mox Sapphire', 'Nadu, Winged Wisdom',
+    'Paradox Engine', 'Primeval Titan', 'Prophet of Kruphix',
+    'Recurring Nightmare', 'Rofellos, Llanowar Emissary', 'Shahrazad',
+    'Sundering Titan', 'Sylvan Primordial',
+    'Time Vault', 'Time Walk', 'Tinker', 'Tolarian Academy',
+    'Trade Secrets', 'Upheaval', "Yawgmoth's Bargain",
+    # Problematic / culturally sensitive or banned in other formats
+    'Invoke Prejudice', 'Cleanse', 'Stone-Throwing Devils', 'Pradesh Gypsies',
+    'Jihad', 'Imprison', 'Crusade',
+    # Cards of the Hero type (non creature)
+    "The Protector", "The Hunter", "The Savant", "The Explorer",
+    "The Philosopher", "The Harvester", "The Tyrant", "The Vanquisher",
+    "The Avenger", "The Slayer", "The Warmonger", "The Destined",
+    "The Warrior", "The General", "The Provider", "The Champion",
+    # Hero Equipment
+    "Spear of the General", "Lash of the Tyrant", "Bow of the Hunter",
+    "Cloak of the Philosopher", "Axe of the Warmonger"
+]
+
+# Constants for setup and CSV processing
+MTGJSON_API_URL: str = 'https://mtgjson.com/api/v5/csv/cards.csv'
+
+LEGENDARY_OPTIONS: List[str] = [
+    'Legendary Creature',
+    'Legendary Artifact',
+    'Legendary Artifact Creature', 
+    'Legendary Enchantment Creature',
+    'Legendary Planeswalker'
+]
+
+NON_LEGAL_SETS: List[str] = [
+    'PHTR', 'PH17', 'PH18', 'PH19', 'PH20', 'PH21',
+    'UGL', 'UND', 'UNH', 'UST'
+]
+
+CARD_TYPES_TO_EXCLUDE: List[str] = [
+    'Plane —',
+    'Conspiracy',
+    'Vanguard', 
+    'Scheme',
+    'Phenomenon',
+    'Stickers',
+    'Attraction',
+    'Contraption'
+]
+
+# Columns to keep when processing CSV files
+CSV_PROCESSING_COLUMNS: List[str] = [
+    'name',        # Card name
+    'faceName',    # Name of specific face for multi-faced cards
+    'edhrecRank',  # Card's rank on EDHREC
+    'colorIdentity',  # Color identity for Commander format
+    'colors',      # Actual colors in card's mana cost
+    'manaCost',    # Mana cost string
+    'manaValue',   # Converted mana cost
+    'type',        # Card type line
+    'layout',      # Card layout (normal, split, etc)
+    'text',        # Card text/rules
+    'power',       # Power (for creatures)
+    'toughness',   # Toughness (for creatures)
+    'keywords',    # Card's keywords
+    'side'         # Side identifier for multi-faced cards
+]
+
+# Configuration for DataFrame sorting operations
+SORT_CONFIG = {
+    'columns': ['name', 'side'],  # Columns to sort by
+    'case_sensitive': False  # Ignore case when sorting
+}
+
+# Configuration for DataFrame filtering operations
+FILTER_CONFIG: Dict[str, Dict[str, List[str]]] = {
+    'layout': {
+        'exclude': ['reversible_card']
+    },
+    'availability': {
+        'require': ['paper']
+    },
+    'promoTypes': {
+        'exclude': ['playtest']
+    },
+    'securityStamp': {
+        'exclude': ['Heart', 'Acorn']
+    }
+}
+
+# COLUMN_ORDER and TAGGED_COLUMN_ORDER now sourced from settings via CARD_DATA_COLUMNS
--- a/code/file_setup/old/setup_csv.py
+++ b/code/file_setup/old/setup_csv.py
@ -0,0 +1,342 @@
+"""MTG Python Deckbuilder setup module.
+
+This module provides the main setup functionality for the MTG Python Deckbuilder
+application. It handles initial setup tasks such as downloading card data,
+creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+
+        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
+
+Key Features:
+    - Initial setup and configuration
+    - Card data download and processing
+    - Color-based card filtering
+    - Commander card list generation
+    - CSV file management and validation
+
+The module works in conjunction with setup_utils.py for utility functions and
+exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+from enum import Enum
+import os
+from typing import List, Dict, Any
+
+# Third-party imports (optional)
+try:
+    import inquirer
+except Exception:
+    inquirer = None  # Fallback to simple input-based menu when unavailable
+import pandas as pd
+
+# Local imports
+import logging_util
+from settings import CSV_DIRECTORY
+from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
+from .setup_utils import (
+    download_cards_csv,
+    filter_dataframe,
+    process_legendary_cards,
+    check_csv_exists,
+    save_color_filtered_csvs,
+    enrich_commander_rows_with_tags,
+)
+from exceptions import (
+    CSVFileNotFoundError,
+    CommanderValidationError,
+    MTGJSONDownloadError
+)
+from scripts import generate_background_cards as background_cards_script
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _generate_background_catalog(cards_path: str, output_path: str) -> None:
+    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
+
+    logger.info('Generating background cards catalog')
+    args = [
+        '--source', cards_path,
+        '--output', output_path,
+    ]
+    try:
+        background_cards_script.main(args)
+    except Exception:  # pragma: no cover - surfaced to caller/test
+        logger.exception('Failed to generate background catalog')
+        raise
+    else:
+        logger.info('Background cards catalog generated successfully')
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+# Create CSV directory if it doesn't exist
+if not os.path.exists(CSV_DIRECTORY):
+    os.makedirs(CSV_DIRECTORY)
+
+## Note: using shared check_csv_exists from setup_utils to avoid duplication
+
+def initial_setup() -> None:
+    """Perform initial setup by downloading and processing card data.
+    
+    **MIGRATION NOTE**: This function now delegates to the Parquet-based setup
+    (initial_setup_parquet) instead of the legacy CSV workflow. The old CSV-based
+    setup is preserved in code/file_setup/old/setup.py for reference.
+    
+    Downloads the latest card data from MTGJSON as Parquet, processes it, and creates
+    the unified all_cards.parquet file. No color-specific files are generated - filtering
+    happens at query time instead.
+    
+    Raises:
+        Various exceptions from Parquet download/processing steps
+    """
+    from .setup_parquet import initial_setup_parquet
+    initial_setup_parquet()
+
+## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
+
+def determine_commanders() -> None:
+    """Generate commander_cards.csv containing all cards eligible to be commanders.
+    
+    This function processes the card database to identify and validate commander-eligible cards,
+    applying comprehensive validation steps and filtering criteria.
+    
+    Raises:
+        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
+        MTGJSONDownloadError: If downloading cards data fails
+        CommanderValidationError: If commander validation fails
+        DataFrameProcessingError: If data processing operations fail
+    """
+    logger.info('Starting commander card generation process')
+    
+    try:
+        # Check for cards.csv with progress tracking
+        cards_file = f'{CSV_DIRECTORY}/cards.csv'
+        if not check_csv_exists(cards_file):
+            logger.info('cards.csv not found, initiating download')
+            download_cards_csv(MTGJSON_API_URL, cards_file)
+        else:
+            logger.info('cards.csv found, proceeding with processing')
+        
+        # Load and process cards data
+        logger.info('Loading card data from CSV')
+        df = pd.read_csv(cards_file, low_memory=False)
+        
+        # Process legendary cards with validation
+        logger.info('Processing and validating legendary cards')
+        try:
+            filtered_df = process_legendary_cards(df)
+        except CommanderValidationError as e:
+            logger.error(f'Commander validation failed: {str(e)}')
+            raise
+        
+        # Apply standard filters
+        logger.info('Applying standard card filters')
+        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
+        
+        logger.info('Enriching commander metadata with theme and creature tags')
+        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
+
+        # Save commander cards
+        logger.info('Saving validated commander cards')
+        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
+        filtered_df.to_csv(commander_path, index=False)
+
+        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
+        _generate_background_catalog(cards_file, background_output)
+
+        logger.info('Commander card generation completed successfully')
+        
+    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
+        logger.error(f'File operation error: {str(e)}')
+        raise
+    except CommanderValidationError as e:
+        logger.error(f'Commander validation error: {str(e)}')
+        raise
+    except Exception as e:
+        logger.error(f'Unexpected error during commander generation: {str(e)}')
+        raise
+    
+def regenerate_csvs_all() -> None:
+    """Regenerate all color-filtered CSV files from latest card data.
+    
+    Downloads fresh card data and recreates all color-filtered CSV files.
+    Useful for updating the card database when new sets are released.
+    
+    Raises:
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        logger.info('Downloading latest card data from MTGJSON')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+        
+        logger.info('Loading and processing card data')
+        try:
+            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
+        except pd.errors.ParserError as e:
+            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
+            df = pd.read_csv(
+                f'{CSV_DIRECTORY}/cards.csv',
+                low_memory=False,
+                on_bad_lines='warn',  # Warn about malformed rows but continue
+                encoding_errors='replace'  # Replace bad encoding chars
+            )
+            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
+        
+        logger.info('Regenerating color identity sorted files')
+        save_color_filtered_csvs(df, CSV_DIRECTORY)
+            
+        logger.info('Regenerating commander cards')
+        determine_commanders()
+        
+        logger.info('Card database regeneration complete')
+        
+    except Exception as e:
+        logger.error(f'Failed to regenerate card database: {str(e)}')
+        raise
+    # Once files are regenerated, create a new legendary list (already executed in try)
+
+def regenerate_csv_by_color(color: str) -> None:
+    """Regenerate CSV file for a specific color identity.
+    
+    Args:
+        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        
+    Raises:
+        ValueError: If color is not valid
+        MTGJSONDownloadError: If card data download fails
+        DataFrameProcessingError: If data processing fails
+        ColorFilterError: If color filtering fails
+    """
+    try:
+        if color not in SETUP_COLORS:
+            raise ValueError(f'Invalid color: {color}')
+
+        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
+
+        logger.info(f'Downloading latest card data for {color} cards')
+        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+
+        logger.info('Loading and processing card data')
+        df = pd.read_csv(
+            f'{CSV_DIRECTORY}/cards.csv',
+            low_memory=False,
+            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
+            encoding_errors='replace'  # Replace bad encoding chars
+        )
+
+        logger.info(f'Regenerating {color} cards CSV')
+        # Use shared utilities to base-filter once then slice color, honoring bans
+        base_df = filter_dataframe(df, BANNED_CARDS)
+        base_df[base_df['colorIdentity'] == color_abv].to_csv(
+            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
+        )
+
+        logger.info(f'Successfully regenerated {color} cards database')
+
+    except Exception as e:
+        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        raise
+
+class SetupOption(Enum):
+    """Enum for setup menu options."""
+    INITIAL_SETUP = 'Initial Setup'
+    REGENERATE_CSV = 'Regenerate CSV Files'
+    BACK = 'Back'
+
+def _display_setup_menu() -> SetupOption:
+    """Display the setup menu and return the selected option.
+    
+    Returns:
+        SetupOption: The selected menu option
+    """
+    if inquirer is not None:
+        question: List[Dict[str, Any]] = [
+            inquirer.List(
+                'menu',
+                choices=[option.value for option in SetupOption],
+                carousel=True)]
+        answer = inquirer.prompt(question)
+        return SetupOption(answer['menu'])
+
+    # Simple fallback when inquirer isn't installed (e.g., headless/container)
+    options = list(SetupOption)
+    print("\nSetup Menu:")
+    for idx, opt in enumerate(options, start=1):
+        print(f"  {idx}) {opt.value}")
+    while True:
+        try:
+            sel = input("Select an option [1]: ").strip() or "1"
+            i = int(sel)
+            if 1 <= i <= len(options):
+                return options[i - 1]
+        except KeyboardInterrupt:
+            print("")
+            return SetupOption.BACK
+        except Exception:
+            pass
+        print("Invalid selection. Please try again.")
+
+def setup() -> bool:
+    """Run the setup process for the MTG Python Deckbuilder.
+    
+    This function provides a menu-driven interface to:
+    1. Perform initial setup by downloading and processing card data
+    2. Regenerate CSV files with updated card data
+    3. Perform all tagging processes on the color-sorted csv files
+    
+    The function handles errors gracefully and provides feedback through logging.
+    
+    Returns:
+        bool: True if setup completed successfully, False otherwise
+    """
+    try:
+        print('Which setup operation would you like to perform?\n'
+              'If this is your first time setting up, do the initial setup.\n'
+              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+        
+        choice = _display_setup_menu()
+        
+        if choice == SetupOption.INITIAL_SETUP:
+            logger.info('Starting initial setup')
+            initial_setup()
+            logger.info('Initial setup completed successfully')
+            return True
+            
+        elif choice == SetupOption.REGENERATE_CSV:
+            logger.info('Starting CSV regeneration')
+            regenerate_csvs_all()
+            logger.info('CSV regeneration completed successfully')
+            return True
+            
+        elif choice == SetupOption.BACK:
+            logger.info('Setup cancelled by user')
+            return False
+            
+    except Exception as e:
+        logger.error(f'Error during setup: {e}')
+        raise
+    
+    return False
--- a/code/file_setup/old/setup_utils.py
+++ b/code/file_setup/old/setup_utils.py
@ -0,0 +1,776 @@
+"""MTG Python Deckbuilder setup utilities.
+
+This module provides utility functions for setting up and managing the MTG Python Deckbuilder
+application. It handles tasks such as downloading card data, filtering cards by various criteria,
+and processing legendary creatures for commander format.
+
+Key Features:
+    - Card data download from MTGJSON
+    - DataFrame filtering and processing
+    - Color identity filtering
+    - Commander validation
+    - CSV file management
+
+The module integrates with settings.py for configuration and exceptions.py for error handling.
+"""
+
+from __future__ import annotations
+
+# Standard library imports
+import ast
+import requests
+from pathlib import Path
+from typing import List, Optional, Union, TypedDict, Iterable, Dict, Any
+
+# Third-party imports
+import pandas as pd
+from tqdm import tqdm
+import json
+from datetime import datetime
+
+# Local application imports
+from .setup_constants import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    SORT_CONFIG,
+    FILTER_CONFIG,
+    COLUMN_ORDER,
+    TAGGED_COLUMN_ORDER,
+    SETUP_COLORS,
+    COLOR_ABRV,
+    BANNED_CARDS,
+)
+from exceptions import (
+    MTGJSONDownloadError,
+    DataFrameProcessingError,
+    ColorFilterError,
+    CommanderValidationError
+)
+from type_definitions import CardLibraryDF
+from settings import FILL_NA_COLUMNS, CSV_DIRECTORY
+import logging_util
+
+# Create logger for this module
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+
+def _is_primary_side(value: object) -> bool:
+    """Return True when the provided side marker corresponds to a primary face."""
+    try:
+        if pd.isna(value):
+            return True
+    except Exception:
+        pass
+    text = str(value).strip().lower()
+    return text in {"", "a"}
+
+
+def _summarize_secondary_face_exclusions(
+    names: Iterable[str],
+    source_df: pd.DataFrame,
+) -> List[Dict[str, Any]]:
+    summaries: List[Dict[str, Any]] = []
+    if not names:
+        return summaries
+
+    for raw_name in names:
+        name = str(raw_name)
+        group = source_df[source_df['name'] == name]
+        if group.empty:
+            continue
+
+        primary_rows = group[group['side'].apply(_is_primary_side)] if 'side' in group.columns else pd.DataFrame()
+        primary_face = (
+            str(primary_rows['faceName'].iloc[0])
+            if not primary_rows.empty and 'faceName' in primary_rows.columns
+            else ""
+        )
+        layout = str(group['layout'].iloc[0]) if 'layout' in group.columns and not group.empty else ""
+        faces = sorted(set(str(v) for v in group.get('faceName', pd.Series(dtype=str)).dropna().tolist()))
+        eligible_faces = sorted(
+            set(
+                str(v)
+                for v in group
+                .loc[~group['side'].apply(_is_primary_side) if 'side' in group.columns else [False] * len(group)]
+                .get('faceName', pd.Series(dtype=str))
+                .dropna()
+                .tolist()
+            )
+        )
+
+        summaries.append(
+            {
+                "name": name,
+                "primary_face": primary_face or name.split('//')[0].strip(),
+                "layout": layout,
+                "faces": faces,
+                "eligible_faces": eligible_faces,
+                "reason": "secondary_face_only",
+            }
+        )
+
+    return summaries
+
+
+def _write_commander_exclusions_log(entries: List[Dict[str, Any]]) -> None:
+    """Persist commander exclusion diagnostics for downstream tooling."""
+
+    path = Path(CSV_DIRECTORY) / ".commander_exclusions.json"
+
+    if not entries:
+        try:
+            path.unlink()
+        except FileNotFoundError:
+            return
+        except Exception as exc:
+            logger.debug("Unable to remove commander exclusion log: %s", exc)
+        return
+
+    payload = {
+        "generated_at": datetime.now().isoformat(timespec='seconds'),
+        "secondary_face_only": entries,
+    }
+
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open('w', encoding='utf-8') as handle:
+            json.dump(payload, handle, indent=2, ensure_ascii=False)
+    except Exception as exc:
+        logger.warning("Failed to write commander exclusion diagnostics: %s", exc)
+
+
+def _enforce_primary_face_commander_rules(
+    candidate_df: pd.DataFrame,
+    source_df: pd.DataFrame,
+) -> pd.DataFrame:
+    """Retain only primary faces and record any secondary-face-only exclusions."""
+
+    if candidate_df.empty or 'side' not in candidate_df.columns:
+        _write_commander_exclusions_log([])
+        return candidate_df
+
+    mask_primary = candidate_df['side'].apply(_is_primary_side)
+    primary_df = candidate_df[mask_primary].copy()
+    secondary_df = candidate_df[~mask_primary]
+
+    primary_names = set(str(n) for n in primary_df.get('name', pd.Series(dtype=str)))
+    secondary_only_names = sorted(
+        set(str(n) for n in secondary_df.get('name', pd.Series(dtype=str))) - primary_names
+    )
+
+    if secondary_only_names:
+        logger.info(
+            "Excluding %d commander entries where only a secondary face is eligible: %s",
+            len(secondary_only_names),
+            ", ".join(secondary_only_names),
+        )
+
+    entries = _summarize_secondary_face_exclusions(secondary_only_names, source_df)
+    _write_commander_exclusions_log(entries)
+
+    return primary_df
+
+
+def _coerce_tag_list(value: object) -> List[str]:
+    """Normalize various list-like representations into a list of strings."""
+
+    if value is None:
+        return []
+    if isinstance(value, float) and pd.isna(value):
+        return []
+    if isinstance(value, (list, tuple, set)):
+        return [str(v).strip() for v in value if str(v).strip()]
+    text = str(value).strip()
+    if not text:
+        return []
+    try:
+        parsed = ast.literal_eval(text)
+        if isinstance(parsed, (list, tuple, set)):
+            return [str(v).strip() for v in parsed if str(v).strip()]
+    except Exception:
+        pass
+    parts = [part.strip() for part in text.replace(";", ",").split(",")]
+    return [part for part in parts if part]
+
+
+def _collect_commander_tag_metadata(csv_dir: Union[str, Path]) -> Dict[str, Dict[str, List[str]]]:
+    """Aggregate theme and creature tags from color-tagged CSV files."""
+
+    path = Path(csv_dir)
+    if not path.exists():
+        return {}
+
+    combined: Dict[str, Dict[str, set[str]]] = {}
+    columns = ("themeTags", "creatureTypes", "roleTags")
+
+    for color in SETUP_COLORS:
+        color_path = path / f"{color}_cards.csv"
+        if not color_path.exists():
+            continue
+        try:
+            df = pd.read_csv(color_path, low_memory=False)
+        except Exception as exc:
+            logger.debug("Unable to read %s for commander tag enrichment: %s", color_path, exc)
+            continue
+
+        if df.empty or ("name" not in df.columns and "faceName" not in df.columns):
+            continue
+
+        for _, row in df.iterrows():
+            face_key = str(row.get("faceName", "")).strip()
+            name_key = str(row.get("name", "")).strip()
+            keys = {k for k in (face_key, name_key) if k}
+            if not keys:
+                continue
+
+            for key in keys:
+                bucket = combined.setdefault(key, {col: set() for col in columns})
+                for col in columns:
+                    if col not in row:
+                        continue
+                    values = _coerce_tag_list(row.get(col))
+                    if values:
+                        bucket[col].update(values)
+
+    enriched: Dict[str, Dict[str, List[str]]] = {}
+    for key, data in combined.items():
+        enriched[key] = {col: sorted(values) for col, values in data.items() if values}
+    return enriched
+
+
+def enrich_commander_rows_with_tags(
+    df: pd.DataFrame,
+    csv_dir: Union[str, Path],
+) -> pd.DataFrame:
+    """Attach theme and creature tag metadata to commander rows when available."""
+
+    if df.empty:
+        df = df.copy()
+        for column in ("themeTags", "creatureTypes", "roleTags"):
+            if column not in df.columns:
+                df[column] = []
+        return df
+
+    metadata = _collect_commander_tag_metadata(csv_dir)
+    if not metadata:
+        df = df.copy()
+        for column in ("themeTags", "creatureTypes", "roleTags"):
+            if column not in df.columns:
+                df[column] = [[] for _ in range(len(df))]
+        return df
+
+    df = df.copy()
+    for column in ("themeTags", "creatureTypes", "roleTags"):
+        if column not in df.columns:
+            df[column] = [[] for _ in range(len(df))]
+
+    theme_values: List[List[str]] = []
+    creature_values: List[List[str]] = []
+    role_values: List[List[str]] = []
+
+    for _, row in df.iterrows():
+        face_key = str(row.get("faceName", "")).strip()
+        name_key = str(row.get("name", "")).strip()
+
+        entry_face = metadata.get(face_key, {})
+        entry_name = metadata.get(name_key, {})
+
+        combined: Dict[str, set[str]] = {
+            "themeTags": set(_coerce_tag_list(row.get("themeTags"))),
+            "creatureTypes": set(_coerce_tag_list(row.get("creatureTypes"))),
+            "roleTags": set(_coerce_tag_list(row.get("roleTags"))),
+        }
+
+        for source in (entry_face, entry_name):
+            for column in combined:
+                combined[column].update(source.get(column, []))
+
+        theme_values.append(sorted(combined["themeTags"]))
+        creature_values.append(sorted(combined["creatureTypes"]))
+        role_values.append(sorted(combined["roleTags"]))
+
+    df["themeTags"] = theme_values
+    df["creatureTypes"] = creature_values
+    df["roleTags"] = role_values
+
+    enriched_rows = sum(1 for t, c, r in zip(theme_values, creature_values, role_values) if t or c or r)
+    logger.debug("Enriched %d commander rows with tag metadata", enriched_rows)
+
+    return df
+
+# Type definitions
+class FilterRule(TypedDict):
+    """Type definition for filter rules configuration."""
+    exclude: Optional[List[str]]
+    require: Optional[List[str]]
+
+class FilterConfig(TypedDict):
+    """Type definition for complete filter configuration."""
+    layout: FilterRule
+    availability: FilterRule
+    promoTypes: FilterRule
+    securityStamp: FilterRule
+def download_cards_csv(url: str, output_path: Union[str, Path]) -> None:
+    """Download cards data from MTGJSON and save to CSV.
+
+    Downloads card data from the specified MTGJSON URL and saves it to a local CSV file.
+    Shows a progress bar during download using tqdm.
+
+    Args:
+        url: URL to download cards data from (typically MTGJSON API endpoint)
+        output_path: Path where the downloaded CSV file will be saved
+
+    Raises:
+        MTGJSONDownloadError: If download fails due to network issues or invalid response
+
+    Example:
+        >>> download_cards_csv('https://mtgjson.com/api/v5/cards.csv', 'cards.csv')
+    """
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        
+        with open(output_path, 'wb') as f:
+            with tqdm(total=total_size, unit='iB', unit_scale=True, desc='Downloading cards data') as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    size = f.write(chunk)
+                    pbar.update(size)
+            
+    except requests.RequestException as e:
+        logger.error(f'Failed to download cards data from {url}')
+        raise MTGJSONDownloadError(
+            "Failed to download cards data",
+            url,
+            getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None
+        ) from e
+def check_csv_exists(filepath: Union[str, Path]) -> bool:
+    """Check if a CSV file exists at the specified path.
+
+    Verifies the existence of a CSV file at the given path. This function is used
+    to determine if card data needs to be downloaded or if it already exists locally.
+
+    Args:
+        filepath: Path to the CSV file to check
+
+    Returns:
+        bool: True if the file exists, False otherwise
+
+    Example:
+        >>> if not check_csv_exists('cards.csv'):
+        ...     download_cards_csv(MTGJSON_API_URL, 'cards.csv')
+    """
+    return Path(filepath).is_file()
+
+def save_color_filtered_csvs(df: pd.DataFrame, out_dir: Union[str, Path]) -> None:
+    """Generate and save color-identity filtered CSVs for all configured colors.
+
+    Iterates across configured color names and their corresponding color identity
+    abbreviations, filters the provided DataFrame using standard filters plus
+    color identity, and writes each filtered set to CSV in the provided directory.
+
+    Args:
+        df: Source DataFrame containing card data.
+        out_dir: Output directory for the generated CSV files.
+
+    Raises:
+        DataFrameProcessingError: If filtering fails.
+        ColorFilterError: If color filtering fails for a specific color.
+    """
+    out_path = Path(out_dir)
+    out_path.mkdir(parents=True, exist_ok=True)
+
+    # Base-filter once for efficiency, then per-color filter without redoing base filters
+    try:
+        # Apply full standard filtering including banned list once, then slice per color
+        base_df = filter_dataframe(df, BANNED_CARDS)
+    except Exception as e:
+        # Wrap any unexpected issues as DataFrameProcessingError
+        raise DataFrameProcessingError(
+            "Failed to prepare base DataFrame for color filtering",
+            "base_color_filtering",
+            str(e)
+        ) from e
+
+    for color_name, color_id in zip(SETUP_COLORS, COLOR_ABRV):
+        try:
+            logger.info(f"Generating {color_name}_cards.csv")
+            color_df = base_df[base_df['colorIdentity'] == color_id]
+            color_df.to_csv(out_path / f"{color_name}_cards.csv", index=False)
+        except Exception as e:
+            raise ColorFilterError(
+                "Failed to generate color CSV",
+                color_id,
+                str(e)
+            ) from e
+
+def filter_dataframe(df: pd.DataFrame, banned_cards: List[str]) -> pd.DataFrame:
+    """Apply standard filters to the cards DataFrame using configuration from settings.
+
+    Applies a series of filters to the cards DataFrame based on configuration from settings.py.
+    This includes handling null values, applying basic filters, removing illegal sets and banned cards,
+    and processing special card types.
+
+    Args:
+        df: pandas DataFrame containing card data to filter
+        banned_cards: List of card names that are banned and should be excluded
+
+    Returns:
+        pd.DataFrame: A new DataFrame containing only the cards that pass all filters
+
+    Raises:
+        DataFrameProcessingError: If any filtering operation fails
+
+    Example:
+        >>> filtered_df = filter_dataframe(cards_df, ['Channel', 'Black Lotus'])
+    """
+    try:
+        logger.info('Starting standard DataFrame filtering')
+        
+        # Fill null values according to configuration
+        for col, fill_value in FILL_NA_COLUMNS.items():
+            if col == 'faceName':
+                fill_value = df['name']
+            df[col] = df[col].fillna(fill_value)
+            logger.debug(f'Filled NA values in {col} with {fill_value}')
+        
+        # Apply basic filters from configuration
+        filtered_df = df.copy()
+        filter_config: FilterConfig = FILTER_CONFIG  # Type hint for configuration
+        for field, rules in filter_config.items():
+            if field not in filtered_df.columns:
+                logger.warning('Skipping filter for missing field %s', field)
+                continue
+
+            for rule_type, values in rules.items():
+                if not values:
+                    continue
+
+                if rule_type == 'exclude':
+                    for value in values:
+                        mask = filtered_df[field].astype(str).str.contains(
+                            value,
+                            case=False,
+                            na=False,
+                            regex=False
+                        )
+                        filtered_df = filtered_df[~mask]
+                elif rule_type == 'require':
+                    for value in values:
+                        mask = filtered_df[field].astype(str).str.contains(
+                            value,
+                            case=False,
+                            na=False,
+                            regex=False
+                        )
+                        filtered_df = filtered_df[mask]
+                else:
+                    logger.warning('Unknown filter rule type %s for field %s', rule_type, field)
+                    continue
+
+                logger.debug(f'Applied {rule_type} filter for {field}: {values}')
+        
+        # Remove illegal sets
+        for set_code in NON_LEGAL_SETS:
+            filtered_df = filtered_df[~filtered_df['printings'].str.contains(set_code, na=False)]
+        logger.debug('Removed illegal sets')
+
+        # Remove banned cards (exact, case-insensitive match on name or faceName)
+        if banned_cards:
+            banned_set = {b.casefold() for b in banned_cards}
+            name_lc = filtered_df['name'].astype(str).str.casefold()
+            face_lc = filtered_df['faceName'].astype(str).str.casefold()
+            mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
+            before = len(filtered_df)
+            filtered_df = filtered_df[mask]
+            after = len(filtered_df)
+            logger.debug(f'Removed banned cards: {before - after} filtered out')
+
+        # Remove special card types
+        for card_type in CARD_TYPES_TO_EXCLUDE:
+            filtered_df = filtered_df[~filtered_df['type'].str.contains(card_type, na=False)]
+        logger.debug('Removed special card types')
+
+        # Select columns, sort, and drop duplicates
+        filtered_df = filtered_df[CSV_PROCESSING_COLUMNS]
+        filtered_df = filtered_df.sort_values(
+            by=SORT_CONFIG['columns'],
+            key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
+        )
+        filtered_df = filtered_df.drop_duplicates(subset='faceName', keep='first')
+        logger.info('Completed standard DataFrame filtering')
+        
+        return filtered_df
+
+    except Exception as e:
+        logger.error(f'Failed to filter DataFrame: {str(e)}')
+        raise DataFrameProcessingError(
+            "Failed to filter DataFrame",
+            "standard_filtering",
+            str(e)
+        ) from e
+def filter_by_color_identity(df: pd.DataFrame, color_identity: str) -> pd.DataFrame:
+    """Filter DataFrame by color identity with additional color-specific processing.
+
+    This function extends the base filter_dataframe functionality with color-specific
+    filtering logic. It is used by setup.py's filter_by_color function but provides
+    a more robust and configurable implementation.
+
+    Args:
+        df: DataFrame to filter
+        color_identity: Color identity to filter by (e.g., 'W', 'U,B', 'Colorless')
+
+    Returns:
+        DataFrame filtered by color identity
+
+    Raises:
+        ColorFilterError: If color identity is invalid or filtering fails
+        DataFrameProcessingError: If general filtering operations fail
+    """
+    try:
+        logger.info(f'Filtering cards for color identity: {color_identity}')
+
+        # Validate color identity
+        with tqdm(total=1, desc='Validating color identity') as pbar:
+            if not isinstance(color_identity, str):
+                raise ColorFilterError(
+                    "Invalid color identity type",
+                    str(color_identity),
+                    "Color identity must be a string"
+                )
+            pbar.update(1)
+            
+        # Apply base filtering
+        with tqdm(total=1, desc='Applying base filtering') as pbar:
+            filtered_df = filter_dataframe(df, BANNED_CARDS)
+            pbar.update(1)
+            
+        # Filter by color identity
+        with tqdm(total=1, desc='Filtering by color identity') as pbar:
+            filtered_df = filtered_df[filtered_df['colorIdentity'] == color_identity]
+            logger.debug(f'Applied color identity filter: {color_identity}')
+            pbar.update(1)
+            
+        # Additional color-specific processing
+        with tqdm(total=1, desc='Performing color-specific processing') as pbar:
+            # Placeholder for future color-specific processing
+            pbar.update(1)
+        logger.info(f'Completed color identity filtering for {color_identity}')
+        return filtered_df
+        
+    except DataFrameProcessingError as e:
+        raise ColorFilterError(
+            "Color filtering failed",
+            color_identity,
+            str(e)
+        ) from e
+    except Exception as e:
+        raise ColorFilterError(
+            "Unexpected error during color filtering",
+            color_identity,
+            str(e)
+        ) from e
+        
+def process_legendary_cards(df: pd.DataFrame) -> pd.DataFrame:
+    """Process and filter legendary cards for commander eligibility with comprehensive validation.
+
+    Args:
+        df: DataFrame containing all cards
+
+    Returns:
+        DataFrame containing only commander-eligible cards
+
+    Raises:
+        CommanderValidationError: If validation fails for legendary status, special cases, or set legality
+        DataFrameProcessingError: If general processing fails
+    """
+    try:
+        logger.info('Starting commander validation process')
+
+        filtered_df = df.copy()
+        # Step 1: Check legendary status
+        try:
+            with tqdm(total=1, desc='Checking legendary status') as pbar:
+                # Normalize type line for matching
+                type_line = filtered_df['type'].astype(str).str.lower()
+
+                # Base predicates
+                is_legendary = type_line.str.contains('legendary')
+                is_creature = type_line.str.contains('creature')
+                # Planeswalkers are only eligible if they explicitly state they can be your commander (handled in special cases step)
+                is_enchantment = type_line.str.contains('enchantment')
+                is_artifact = type_line.str.contains('artifact')
+                is_vehicle_or_spacecraft = type_line.str.contains('vehicle') | type_line.str.contains('spacecraft')
+
+                # 1. Always allow Legendary Creatures (includes artifact/enchantment creatures already)
+                allow_legendary_creature = is_legendary & is_creature
+
+                # 2. Allow Legendary Enchantment Creature (already covered by legendary creature) – ensure no plain legendary enchantments without creature type slip through
+                allow_enchantment_creature = is_legendary & is_enchantment & is_creature
+
+                # 3. Allow certain Legendary Artifacts:
+                #    a) Vehicles/Spacecraft that have printed power & toughness
+                has_power_toughness = filtered_df['power'].notna() & filtered_df['toughness'].notna()
+                allow_artifact_vehicle = is_legendary & is_artifact & is_vehicle_or_spacecraft & has_power_toughness
+
+                # (Artifacts or planeswalkers with explicit permission text will be added in special cases step.)
+
+                baseline_mask = allow_legendary_creature | allow_enchantment_creature | allow_artifact_vehicle
+                filtered_df = filtered_df[baseline_mask].copy()
+
+                if filtered_df.empty:
+                    raise CommanderValidationError(
+                        "No baseline eligible commanders found",
+                        "legendary_check",
+                        "After applying commander rules no cards qualified"
+                    )
+
+                logger.debug(
+                    "Baseline commander counts: total=%d legendary_creatures=%d enchantment_creatures=%d artifact_vehicles=%d", 
+                    len(filtered_df),
+                    int((allow_legendary_creature).sum()),
+                    int((allow_enchantment_creature).sum()),
+                    int((allow_artifact_vehicle).sum())
+                )
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Legendary status check failed",
+                "legendary_check",
+                str(e)
+            ) from e
+
+        # Step 2: Validate special cases
+        try:
+            with tqdm(total=1, desc='Validating special cases') as pbar:
+                # Add any card (including planeswalkers, artifacts, non-legendary cards) that explicitly allow being a commander
+                special_cases = df['text'].str.contains('can be your commander', na=False, case=False)
+                special_commanders = df[special_cases].copy()
+                filtered_df = pd.concat([filtered_df, special_commanders]).drop_duplicates()
+                logger.debug(f'Added {len(special_commanders)} special commander cards')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Special case validation failed",
+                "special_cases",
+                str(e)
+            ) from e
+
+        # Step 3: Verify set legality
+        try:
+            with tqdm(total=1, desc='Verifying set legality') as pbar:
+                initial_count = len(filtered_df)
+                for set_code in NON_LEGAL_SETS:
+                    filtered_df = filtered_df[
+                        ~filtered_df['printings'].str.contains(set_code, na=False)
+                    ]
+                removed_count = initial_count - len(filtered_df)
+                logger.debug(f'Removed {removed_count} cards from illegal sets')
+                pbar.update(1)
+        except Exception as e:
+            raise CommanderValidationError(
+                "Set legality verification failed",
+                "set_legality",
+                str(e)
+            ) from e
+        filtered_df = _enforce_primary_face_commander_rules(filtered_df, df)
+
+        logger.info('Commander validation complete. %d valid commanders found', len(filtered_df))
+        return filtered_df
+
+    except CommanderValidationError:
+        raise
+    except Exception as e:
+        raise DataFrameProcessingError(
+            "Failed to process legendary cards",
+            "commander_processing",
+            str(e)
+        ) from e
+
+def process_card_dataframe(df: CardLibraryDF, batch_size: int = 1000, columns_to_keep: Optional[List[str]] = None,
+                         include_commander_cols: bool = False, skip_availability_checks: bool = False) -> CardLibraryDF:
+    """Process DataFrame with common operations in batches.
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+        skip_availability_checks: Whether to skip availability and security checks (default: False)
+
+    Args:
+        df: DataFrame to process
+        batch_size: Size of batches for processing
+        columns_to_keep: List of columns to keep (default: COLUMN_ORDER)
+        include_commander_cols: Whether to include commander-specific columns
+
+    Returns:
+        CardLibraryDF: Processed DataFrame with standardized structure
+    """
+    logger.info("Processing card DataFrame...")
+
+    if columns_to_keep is None:
+        columns_to_keep = TAGGED_COLUMN_ORDER.copy()
+        if include_commander_cols:
+            commander_cols = ['printings', 'text', 'power', 'toughness', 'keywords']
+            columns_to_keep.extend(col for col in commander_cols if col not in columns_to_keep)
+
+    # Fill NA values
+    df.loc[:, 'colorIdentity'] = df['colorIdentity'].fillna('Colorless')
+    df.loc[:, 'faceName'] = df['faceName'].fillna(df['name'])
+
+    # Process in batches
+    total_batches = len(df) // batch_size + 1
+    processed_dfs = []
+
+    for i in tqdm(range(total_batches), desc="Processing batches"):
+        start_idx = i * batch_size
+        end_idx = min((i + 1) * batch_size, len(df))
+        batch = df.iloc[start_idx:end_idx].copy()
+
+        if not skip_availability_checks:
+            columns_to_keep = COLUMN_ORDER.copy()
+            logger.debug("Performing column checks...")
+            # Common processing steps
+            batch = batch[batch['availability'].str.contains('paper', na=False)]
+            batch = batch.loc[batch['layout'] != 'reversible_card']
+            batch = batch.loc[batch['promoTypes'] != 'playtest']
+            batch = batch.loc[batch['securityStamp'] != 'heart']
+            batch = batch.loc[batch['securityStamp'] != 'acorn']
+            # Keep only specified columns
+            batch = batch[columns_to_keep]
+            processed_dfs.append(batch)
+        else:
+            logger.debug("Skipping column checks...")
+            # Even when skipping availability checks, still ensure columns_to_keep if provided
+            if columns_to_keep is not None:
+                try:
+                    batch = batch[columns_to_keep]
+                except Exception:
+                    # If requested columns are not present, keep as-is
+                    pass
+            processed_dfs.append(batch)
+
+    # Combine processed batches
+    result = pd.concat(processed_dfs, ignore_index=True)
+
+    # Final processing
+    result.drop_duplicates(subset='faceName', keep='first', inplace=True)
+    result.sort_values(by=['name', 'side'], key=lambda col: col.str.lower(), inplace=True)
+
+    logger.info("DataFrame processing completed")
+    return result
+
+# Backward-compatibility wrapper used by deck_builder.builder
+def regenerate_csvs_all() -> None:  # pragma: no cover - simple delegator
+    """Delegate to setup.regenerate_csvs_all to preserve existing imports.
+
+    Some modules import regenerate_csvs_all from setup_utils. Keep this
+    function as a stable indirection to avoid breaking callers.
+    """
+    from . import setup as setup_module  # local import to avoid circular import
+    setup_module.regenerate_csvs_all()
--- a/code/file_setup/scryfall_bulk_data.py
+++ b/code/file_setup/scryfall_bulk_data.py
@ -0,0 +1,169 @@
+"""
+Scryfall Bulk Data API client.
+
+Fetches bulk data JSON files from Scryfall's bulk data API, which provides
+all card information including image URLs without hitting rate limits.
+
+See: https://scryfall.com/docs/api/bulk-data
+"""
+
+import logging
+import os
+import time
+from typing import Any
+from urllib.request import Request, urlopen
+
+logger = logging.getLogger(__name__)
+
+BULK_DATA_API_URL = "https://api.scryfall.com/bulk-data"
+DEFAULT_BULK_TYPE = "default_cards"  # All cards in Scryfall's database
+RATE_LIMIT_DELAY = 0.1  # 100ms between requests (50-100ms per Scryfall guidelines)
+
+
+class ScryfallBulkDataClient:
+    """Client for fetching Scryfall bulk data."""
+
+    def __init__(self, rate_limit_delay: float = RATE_LIMIT_DELAY):
+        """
+        Initialize Scryfall bulk data client.
+
+        Args:
+            rate_limit_delay: Seconds to wait between API requests (default 100ms)
+        """
+        self.rate_limit_delay = rate_limit_delay
+        self._last_request_time: float = 0.0
+
+    def _rate_limit_wait(self) -> None:
+        """Wait to respect rate limits between API calls."""
+        elapsed = time.time() - self._last_request_time
+        if elapsed < self.rate_limit_delay:
+            time.sleep(self.rate_limit_delay - elapsed)
+        self._last_request_time = time.time()
+
+    def _make_request(self, url: str) -> Any:
+        """
+        Make HTTP request with rate limiting and error handling.
+
+        Args:
+            url: URL to fetch
+
+        Returns:
+            Parsed JSON response
+
+        Raises:
+            Exception: If request fails after retries
+        """
+        self._rate_limit_wait()
+
+        try:
+            req = Request(url)
+            req.add_header("User-Agent", "MTG-Deckbuilder/3.0 (Image Cache)")
+            with urlopen(req, timeout=30) as response:
+                import json
+                return json.loads(response.read().decode("utf-8"))
+        except Exception as e:
+            logger.error(f"Failed to fetch {url}: {e}")
+            raise
+
+    def get_bulk_data_info(self, bulk_type: str = DEFAULT_BULK_TYPE) -> dict[str, Any]:
+        """
+        Get bulk data metadata (download URL, size, last updated).
+
+        Args:
+            bulk_type: Type of bulk data to fetch (default: default_cards)
+
+        Returns:
+            Dictionary with bulk data info including 'download_uri'
+
+        Raises:
+            ValueError: If bulk_type not found
+            Exception: If API request fails
+        """
+        logger.info(f"Fetching bulk data info for type: {bulk_type}")
+        response = self._make_request(BULK_DATA_API_URL)
+
+        # Find the requested bulk data type
+        for item in response.get("data", []):
+            if item.get("type") == bulk_type:
+                logger.info(
+                    f"Found bulk data: {item.get('name')} "
+                    f"(size: {item.get('size', 0) / 1024 / 1024:.1f} MB, "
+                    f"updated: {item.get('updated_at', 'unknown')})"
+                )
+                return item
+
+        raise ValueError(f"Bulk data type '{bulk_type}' not found")
+
+    def download_bulk_data(
+        self, download_uri: str, output_path: str, progress_callback=None
+    ) -> None:
+        """
+        Download bulk data JSON file.
+
+        Args:
+            download_uri: Direct download URL from get_bulk_data_info()
+            output_path: Local path to save the JSON file
+            progress_callback: Optional callback(bytes_downloaded, total_bytes)
+
+        Raises:
+            Exception: If download fails
+        """
+        logger.info(f"Downloading bulk data from: {download_uri}")
+        logger.info(f"Saving to: {output_path}")
+
+        # No rate limit on bulk data downloads per Scryfall docs
+        try:
+            req = Request(download_uri)
+            req.add_header("User-Agent", "MTG-Deckbuilder/3.0 (Image Cache)")
+
+            with urlopen(req, timeout=60) as response:
+                total_size = int(response.headers.get("Content-Length", 0))
+                downloaded = 0
+                chunk_size = 1024 * 1024  # 1MB chunks
+
+                # Ensure output directory exists
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+                with open(output_path, "wb") as f:
+                    while True:
+                        chunk = response.read(chunk_size)
+                        if not chunk:
+                            break
+                        f.write(chunk)
+                        downloaded += len(chunk)
+                        if progress_callback:
+                            progress_callback(downloaded, total_size)
+
+            logger.info(f"Downloaded {downloaded / 1024 / 1024:.1f} MB successfully")
+
+        except Exception as e:
+            logger.error(f"Failed to download bulk data: {e}")
+            # Clean up partial download
+            if os.path.exists(output_path):
+                os.remove(output_path)
+            raise
+
+    def get_bulk_data(
+        self,
+        bulk_type: str = DEFAULT_BULK_TYPE,
+        output_path: str = "card_files/raw/scryfall_bulk_data.json",
+        progress_callback=None,
+    ) -> str:
+        """
+        Fetch bulk data info and download the JSON file.
+
+        Args:
+            bulk_type: Type of bulk data to fetch
+            output_path: Where to save the JSON file
+            progress_callback: Optional progress callback
+
+        Returns:
+            Path to downloaded file
+
+        Raises:
+            Exception: If fetch or download fails
+        """
+        info = self.get_bulk_data_info(bulk_type)
+        download_uri = info["download_uri"]
+        self.download_bulk_data(download_uri, output_path, progress_callback)
+        return output_path
--- a/code/file_setup/setup.py
+++ b/code/file_setup/setup.py
@ -1,362 +1,412 @@
-"""MTG Python Deckbuilder setup module.
+"""Parquet-based setup for MTG Python Deckbuilder.

-This module provides the main setup functionality for the MTG Python Deckbuilder
-application. It handles initial setup tasks such as downloading card data,
-creating color-filtered card lists, and gener        logger.info(f'Downloading latest card data for {color} cards')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
+This module handles downloading and processing MTGJSON Parquet data for the
+MTG Python Deckbuilder. It replaces the old CSV-based multi-file approach
+with a single-file Parquet workflow.

-        logger.info('Loading and processing card data')
-        try:
-            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
-        except pd.errors.ParserError as e:
-            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
-            df = pd.read_csv(
-                f'{CSV_DIRECTORY}/cards.csv',
-                low_memory=False,
-                on_bad_lines='warn',  # Warn about malformed rows but continue
-                encoding_errors='replace'  # Replace bad encoding chars
-            )
-            logger.info('Successfully loaded card data with error handling (some rows may have been skipped)')
+Key Changes from CSV approach:
+- Single all_cards.parquet file instead of 18+ color-specific CSVs
+- Downloads from MTGJSON Parquet API (faster, smaller)
+- Adds isCommander and isBackground boolean flags
+- Filters to essential columns only (14 base + 4 custom = 18 total)
+- Uses DataLoader abstraction for format flexibility

-        logger.info(f'Regenerating {color} cards CSV')der-eligible card lists.
-
-Key Features:
-    - Initial setup and configuration
-    - Card data download and processing
-    - Color-based card filtering
-    - Commander card list generation
-    - CSV file management and validation
-
-The module works in conjunction with setup_utils.py for utility functions and
-exceptions.py for error handling.
+Introduced in v3.0.0 as part of CSV→Parquet migration.
 """

 from __future__ import annotations

-# Standard library imports
-from enum import Enum
 import os
-from typing import List, Dict, Any

-# Third-party imports (optional)
-try:
-    import inquirer  # type: ignore
-except Exception:
-    inquirer = None  # Fallback to simple input-based menu when unavailable
 import pandas as pd
+import requests
+from tqdm import tqdm

-# Local imports
+from .data_loader import DataLoader, validate_schema
+from .setup_constants import (
+    CSV_PROCESSING_COLUMNS,
+    CARD_TYPES_TO_EXCLUDE,
+    NON_LEGAL_SETS,
+    BANNED_CARDS,
+    FILTER_CONFIG,
+    SORT_CONFIG,
+)
 import logging_util
-from settings import CSV_DIRECTORY
-from .setup_constants import BANNED_CARDS, SETUP_COLORS, COLOR_ABRV, MTGJSON_API_URL
-from .setup_utils import (
-    download_cards_csv,
-    filter_dataframe,
-    process_legendary_cards,
-    check_csv_exists,
-    save_color_filtered_csvs,
-    enrich_commander_rows_with_tags,
-)
-from exceptions import (
-    CSVFileNotFoundError,
-    CommanderValidationError,
-    MTGJSONDownloadError
-)
-from scripts import generate_background_cards as background_cards_script
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
+from path_util import card_files_raw_dir, get_processed_cards_path
+import settings
+
+logger = logging_util.get_logger(__name__)
+
+# MTGJSON Parquet API URL
+MTGJSON_PARQUET_URL = "https://mtgjson.com/api/v5/parquet/cards.parquet"


-def _generate_background_catalog(cards_path: str, output_path: str) -> None:
-    """Regenerate ``background_cards.csv`` from the latest cards dataset."""
-
-    logger.info('Generating background cards catalog')
-    args = [
-        '--source', cards_path,
-        '--output', output_path,
-    ]
-    try:
-        background_cards_script.main(args)
-    except Exception:  # pragma: no cover - surfaced to caller/test
-        logger.exception('Failed to generate background catalog')
-        raise
-    else:
-        logger.info('Background cards catalog generated successfully')
-
-# Create logger for this module
-logger = logging_util.logging.getLogger(__name__)
-logger.setLevel(logging_util.LOG_LEVEL)
-logger.addHandler(logging_util.file_handler)
-logger.addHandler(logging_util.stream_handler)
-
-# Create CSV directory if it doesn't exist
-if not os.path.exists(CSV_DIRECTORY):
-    os.makedirs(CSV_DIRECTORY)
-
-## Note: using shared check_csv_exists from setup_utils to avoid duplication
-
-def initial_setup() -> None:
-    """Perform initial setup by downloading card data and creating filtered CSV files.
-    
-    Downloads the latest card data from MTGJSON if needed, creates color-filtered CSV files,
-    and generates commander-eligible cards list. Uses utility functions from setup_utils.py
-    for file operations and data processing.
-    
-    Raises:
-        CSVFileNotFoundError: If required CSV files cannot be found
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
-    """
-    logger.info('Checking for cards.csv file')
-    
-    try:
-        cards_file = f'{CSV_DIRECTORY}/cards.csv'
-        try:
-            with open(cards_file, 'r', encoding='utf-8'):
-                logger.info('cards.csv exists')
-        except FileNotFoundError:
-            logger.info('cards.csv not found, downloading from mtgjson')
-            download_cards_csv(MTGJSON_API_URL, cards_file)
-        
-        df = pd.read_csv(cards_file, low_memory=False)
-        
-        logger.info('Checking for color identity sorted files')
-        # Generate color-identity filtered CSVs in one pass
-        save_color_filtered_csvs(df, CSV_DIRECTORY)
-        
-        # Generate commander list
-        determine_commanders()
-
-    except Exception as e:
-        logger.error(f'Error during initial setup: {str(e)}')
-        raise
-
-## Removed local filter_by_color in favor of setup_utils.save_color_filtered_csvs
-
-def determine_commanders() -> None:
-    """Generate commander_cards.csv containing all cards eligible to be commanders.
-    
-    This function processes the card database to identify and validate commander-eligible cards,
-    applying comprehensive validation steps and filtering criteria.
-    
-    Raises:
-        CSVFileNotFoundError: If cards.csv is missing and cannot be downloaded
-        MTGJSONDownloadError: If downloading cards data fails
-        CommanderValidationError: If commander validation fails
-        DataFrameProcessingError: If data processing operations fail
-    """
-    logger.info('Starting commander card generation process')
-    
-    try:
-        # Check for cards.csv with progress tracking
-        cards_file = f'{CSV_DIRECTORY}/cards.csv'
-        if not check_csv_exists(cards_file):
-            logger.info('cards.csv not found, initiating download')
-            download_cards_csv(MTGJSON_API_URL, cards_file)
-        else:
-            logger.info('cards.csv found, proceeding with processing')
-        
-        # Load and process cards data
-        logger.info('Loading card data from CSV')
-        df = pd.read_csv(cards_file, low_memory=False)
-        
-        # Process legendary cards with validation
-        logger.info('Processing and validating legendary cards')
-        try:
-            filtered_df = process_legendary_cards(df)
-        except CommanderValidationError as e:
-            logger.error(f'Commander validation failed: {str(e)}')
-            raise
-        
-        # Apply standard filters
-        logger.info('Applying standard card filters')
-        filtered_df = filter_dataframe(filtered_df, BANNED_CARDS)
-        
-        logger.info('Enriching commander metadata with theme and creature tags')
-        filtered_df = enrich_commander_rows_with_tags(filtered_df, CSV_DIRECTORY)
-
-        # Save commander cards
-        logger.info('Saving validated commander cards')
-        commander_path = f'{CSV_DIRECTORY}/commander_cards.csv'
-        filtered_df.to_csv(commander_path, index=False)
-
-        background_output = f'{CSV_DIRECTORY}/background_cards.csv'
-        _generate_background_catalog(cards_file, background_output)
-
-        logger.info('Commander card generation completed successfully')
-        
-    except (CSVFileNotFoundError, MTGJSONDownloadError) as e:
-        logger.error(f'File operation error: {str(e)}')
-        raise
-    except CommanderValidationError as e:
-        logger.error(f'Commander validation error: {str(e)}')
-        raise
-    except Exception as e:
-        logger.error(f'Unexpected error during commander generation: {str(e)}')
-        raise
-    
-def regenerate_csvs_all() -> None:
-    """Regenerate all color-filtered CSV files from latest card data.
-    
-    Downloads fresh card data and recreates all color-filtered CSV files.
-    Useful for updating the card database when new sets are released.
-    
-    Raises:
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
-    """
-    try:
-        logger.info('Downloading latest card data from MTGJSON')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
-        
-        logger.info('Loading and processing card data')
-        try:
-            df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False)
-        except pd.errors.ParserError as e:
-            logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...')
-            df = pd.read_csv(
-                f'{CSV_DIRECTORY}/cards.csv',
-                low_memory=False,
-                on_bad_lines='warn',  # Warn about malformed rows but continue
-                encoding_errors='replace'  # Replace bad encoding chars
-            )
-            logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)')
-        
-        logger.info('Regenerating color identity sorted files')
-        save_color_filtered_csvs(df, CSV_DIRECTORY)
-            
-        logger.info('Regenerating commander cards')
-        determine_commanders()
-        
-        logger.info('Card database regeneration complete')
-        
-    except Exception as e:
-        logger.error(f'Failed to regenerate card database: {str(e)}')
-        raise
-    # Once files are regenerated, create a new legendary list (already executed in try)
-
-def regenerate_csv_by_color(color: str) -> None:
-    """Regenerate CSV file for a specific color identity.
+def download_parquet_from_mtgjson(output_path: str) -> None:
+    """Download MTGJSON cards.parquet file.
    
    Args:
-        color: Color name to regenerate CSV for (e.g. 'white', 'blue')
+        output_path: Where to save the downloaded Parquet file
        
    Raises:
-        ValueError: If color is not valid
-        MTGJSONDownloadError: If card data download fails
-        DataFrameProcessingError: If data processing fails
-        ColorFilterError: If color filtering fails
+        requests.RequestException: If download fails
+        IOError: If file cannot be written
    """
+    logger.info(f"Downloading MTGJSON Parquet from {MTGJSON_PARQUET_URL}")
+    
    try:
-        if color not in SETUP_COLORS:
-            raise ValueError(f'Invalid color: {color}')
-
-        color_abv = COLOR_ABRV[SETUP_COLORS.index(color)]
-
-        logger.info(f'Downloading latest card data for {color} cards')
-        download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv')
-
-        logger.info('Loading and processing card data')
-        df = pd.read_csv(
-            f'{CSV_DIRECTORY}/cards.csv',
-            low_memory=False,
-            on_bad_lines='skip',  # Skip malformed rows (MTGJSON CSV has escaping issues)
-            encoding_errors='replace'  # Replace bad encoding chars
-        )
-
-        logger.info(f'Regenerating {color} cards CSV')
-        # Use shared utilities to base-filter once then slice color, honoring bans
-        base_df = filter_dataframe(df, BANNED_CARDS)
-        base_df[base_df['colorIdentity'] == color_abv].to_csv(
-            f'{CSV_DIRECTORY}/{color}_cards.csv', index=False
-        )
-
-        logger.info(f'Successfully regenerated {color} cards database')
-
-    except Exception as e:
-        logger.error(f'Failed to regenerate {color} cards: {str(e)}')
+        response = requests.get(MTGJSON_PARQUET_URL, stream=True, timeout=60)
+        response.raise_for_status()
+        
+        # Get file size for progress bar
+        total_size = int(response.headers.get('content-length', 0))
+        
+        # Ensure output directory exists
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # Download with progress bar
+        with open(output_path, 'wb') as f, tqdm(
+            total=total_size,
+            unit='B',
+            unit_scale=True,
+            desc='Downloading cards.parquet'
+        ) as pbar:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+                pbar.update(len(chunk))
+        
+        logger.info(f"✓ Downloaded {total_size / (1024**2):.2f} MB to {output_path}")
+        
+    except requests.RequestException as e:
+        logger.error(f"Failed to download MTGJSON Parquet: {e}")
+        raise
+    except IOError as e:
+        logger.error(f"Failed to write Parquet file: {e}")
        raise

-class SetupOption(Enum):
-    """Enum for setup menu options."""
-    INITIAL_SETUP = 'Initial Setup'
-    REGENERATE_CSV = 'Regenerate CSV Files'
-    BACK = 'Back'

-def _display_setup_menu() -> SetupOption:
-    """Display the setup menu and return the selected option.
+def is_valid_commander(row: pd.Series) -> bool:
+    """Determine if a card can be a commander.
    
-    Returns:
-        SetupOption: The selected menu option
-    """
-    if inquirer is not None:
-        question: List[Dict[str, Any]] = [
-            inquirer.List(
-                'menu',
-                choices=[option.value for option in SetupOption],
-                carousel=True)]
-        answer = inquirer.prompt(question)
-        return SetupOption(answer['menu'])
-
-    # Simple fallback when inquirer isn't installed (e.g., headless/container)
-    options = list(SetupOption)
-    print("\nSetup Menu:")
-    for idx, opt in enumerate(options, start=1):
-        print(f"  {idx}) {opt.value}")
-    while True:
-        try:
-            sel = input("Select an option [1]: ").strip() or "1"
-            i = int(sel)
-            if 1 <= i <= len(options):
-                return options[i - 1]
-        except KeyboardInterrupt:
-            print("")
-            return SetupOption.BACK
-        except Exception:
-            pass
-        print("Invalid selection. Please try again.")
-
-def setup() -> bool:
-    """Run the setup process for the MTG Python Deckbuilder.
+    Criteria:
+    - Legendary Creature
+    - OR: Has "can be your commander" in text
+    - OR: Background (Partner with Background)
    
-    This function provides a menu-driven interface to:
-    1. Perform initial setup by downloading and processing card data
-    2. Regenerate CSV files with updated card data
-    3. Perform all tagging processes on the color-sorted csv files
-    
-    The function handles errors gracefully and provides feedback through logging.
-    
-    Returns:
-        bool: True if setup completed successfully, False otherwise
-    """
-    try:
-        print('Which setup operation would you like to perform?\n'
-              'If this is your first time setting up, do the initial setup.\n'
-              'If you\'ve done the basic setup before, you can regenerate the CSV files\n')
+    Args:
+        row: DataFrame row with card data
        
-        choice = _display_setup_menu()
-        
-        if choice == SetupOption.INITIAL_SETUP:
-            logger.info('Starting initial setup')
-            initial_setup()
-            logger.info('Initial setup completed successfully')
-            return True
-            
-        elif choice == SetupOption.REGENERATE_CSV:
-            logger.info('Starting CSV regeneration')
-            regenerate_csvs_all()
-            logger.info('CSV regeneration completed successfully')
-            return True
-            
-        elif choice == SetupOption.BACK:
-            logger.info('Setup cancelled by user')
-            return False
-            
-    except Exception as e:
-        logger.error(f'Error during setup: {e}')
-        raise
+    Returns:
+        True if card can be a commander
+    """
+    type_line = str(row.get('type', ''))
+    text = str(row.get('text', '')).lower()
+    
+    # Legendary Creature
+    if 'Legendary' in type_line and 'Creature' in type_line:
+        return True
+    
+    # Special text (e.g., "can be your commander")
+    if 'can be your commander' in text:
+        return True
+    
+    # Backgrounds can be commanders (with Choose a Background)
+    if 'Background' in type_line:
+        return True
    
    return False
+
+
+def is_background(row: pd.Series) -> bool:
+    """Determine if a card is a Background.
+    
+    Args:
+        row: DataFrame row with card data
+        
+    Returns:
+        True if card has Background type
+    """
+    type_line = str(row.get('type', ''))
+    return 'Background' in type_line
+
+
+def extract_creature_types(row: pd.Series) -> str:
+    """Extract creature types from type line.
+    
+    Args:
+        row: DataFrame row with card data
+        
+    Returns:
+        Comma-separated creature types or empty string
+    """
+    type_line = str(row.get('type', ''))
+    
+    # Check if it's a creature
+    if 'Creature' not in type_line:
+        return ''
+    
+    # Split on — to get subtypes
+    if '—' in type_line:
+        parts = type_line.split('—')
+        if len(parts) >= 2:
+            # Get everything after the dash, strip whitespace
+            subtypes = parts[1].strip()
+            return subtypes
+    
+    return ''
+
+
+def process_raw_parquet(raw_path: str, output_path: str) -> pd.DataFrame:
+    """Process raw MTGJSON Parquet into processed all_cards.parquet.
+    
+    This function:
+    1. Loads raw Parquet (all ~82 columns)
+    2. Filters to essential columns (CSV_PROCESSING_COLUMNS)
+    3. Applies standard filtering (banned cards, illegal sets, special types)
+    4. Deduplicates by faceName (keep first printing only)
+    5. Adds custom columns: creatureTypes, themeTags, isCommander, isBackground
+    6. Validates schema
+    7. Writes to processed directory
+    
+    Args:
+        raw_path: Path to raw cards.parquet from MTGJSON
+        output_path: Path to save processed all_cards.parquet
+        
+    Returns:
+        Processed DataFrame
+        
+    Raises:
+        ValueError: If schema validation fails
+    """
+    logger.info(f"Processing {raw_path}")
+    
+    # Load raw Parquet with DataLoader
+    loader = DataLoader()
+    df = loader.read_cards(raw_path)
+    
+    logger.info(f"Loaded {len(df)} cards with {len(df.columns)} columns")
+    
+    # Step 1: Fill NA values
+    logger.info("Filling NA values")
+    for col, fill_value in settings.FILL_NA_COLUMNS.items():
+        if col in df.columns:
+            if col == 'faceName':
+                df[col] = df[col].fillna(df['name'])
+            else:
+                df[col] = df[col].fillna(fill_value)
+    
+    # Step 2: Apply configuration-based filters (FILTER_CONFIG)
+    logger.info("Applying configuration filters")
+    for field, rules in FILTER_CONFIG.items():
+        if field not in df.columns:
+            logger.warning(f"Skipping filter for missing field: {field}")
+            continue
+        
+        for rule_type, values in rules.items():
+            if not values:
+                continue
+            
+            if rule_type == 'exclude':
+                for value in values:
+                    mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
+                    before = len(df)
+                    df = df[~mask]
+                    logger.debug(f"Excluded {field} containing '{value}': {before - len(df)} removed")
+            elif rule_type == 'require':
+                for value in values:
+                    mask = df[field].astype(str).str.contains(value, case=False, na=False, regex=False)
+                    before = len(df)
+                    df = df[mask]
+                    logger.debug(f"Required {field} containing '{value}': {before - len(df)} removed")
+    
+    # Step 3: Remove illegal sets
+    if 'printings' in df.columns:
+        logger.info("Removing illegal sets")
+        for set_code in NON_LEGAL_SETS:
+            before = len(df)
+            df = df[~df['printings'].str.contains(set_code, na=False)]
+            if len(df) < before:
+                logger.debug(f"Removed set {set_code}: {before - len(df)} cards")
+    
+    # Step 4: Remove banned cards
+    logger.info("Removing banned cards")
+    banned_set = {b.casefold() for b in BANNED_CARDS}
+    name_lc = df['name'].astype(str).str.casefold()
+    face_lc = df['faceName'].astype(str).str.casefold() if 'faceName' in df.columns else name_lc
+    mask = ~(name_lc.isin(banned_set) | face_lc.isin(banned_set))
+    before = len(df)
+    df = df[mask]
+    logger.debug(f"Removed banned cards: {before - len(df)} filtered out")
+    
+    # Step 5: Remove special card types
+    logger.info("Removing special card types")
+    for card_type in CARD_TYPES_TO_EXCLUDE:
+        before = len(df)
+        df = df[~df['type'].str.contains(card_type, na=False)]
+        if len(df) < before:
+            logger.debug(f"Removed type {card_type}: {before - len(df)} cards")
+    
+    # Step 6: Filter to essential columns only (reduce from ~82 to 14)
+    logger.info(f"Filtering to {len(CSV_PROCESSING_COLUMNS)} essential columns")
+    df = df[CSV_PROCESSING_COLUMNS]
+    
+    # Step 7: Sort and deduplicate (CRITICAL: keeps only one printing per unique card)
+    logger.info("Sorting and deduplicating cards")
+    df = df.sort_values(
+        by=SORT_CONFIG['columns'],
+        key=lambda col: col.str.lower() if not SORT_CONFIG['case_sensitive'] else col
+    )
+    before = len(df)
+    df = df.drop_duplicates(subset='faceName', keep='first')
+    logger.info(f"Deduplicated: {before} → {len(df)} cards ({before - len(df)} duplicate printings removed)")
+    
+    # Step 8: Add custom columns
+    logger.info("Adding custom columns: creatureTypes, themeTags, isCommander, isBackground")
+    
+    # creatureTypes: extracted from type line
+    df['creatureTypes'] = df.apply(extract_creature_types, axis=1)
+    
+    # themeTags: empty placeholder (filled during tagging)
+    df['themeTags'] = ''
+    
+    # isCommander: boolean flag
+    df['isCommander'] = df.apply(is_valid_commander, axis=1)
+    
+    # isBackground: boolean flag
+    df['isBackground'] = df.apply(is_background, axis=1)
+    
+    # Reorder columns to match CARD_DATA_COLUMNS
+    # CARD_DATA_COLUMNS has: name, faceName, edhrecRank, colorIdentity, colors,
+    #                        manaCost, manaValue, type, creatureTypes, text,
+    #                        power, toughness, keywords, themeTags, layout, side
+    # We need to add isCommander and isBackground at the end
+    final_columns = settings.CARD_DATA_COLUMNS + ['isCommander', 'isBackground']
+    
+    # Ensure all columns exist
+    for col in final_columns:
+        if col not in df.columns:
+            logger.warning(f"Column {col} missing, adding empty column")
+            df[col] = ''
+    
+    df = df[final_columns]
+    
+    logger.info(f"Final dataset: {len(df)} cards, {len(df.columns)} columns")
+    logger.info(f"Commanders: {df['isCommander'].sum()}")
+    logger.info(f"Backgrounds: {df['isBackground'].sum()}")
+    
+    # Validate schema (check required columns present)
+    try:
+        validate_schema(df)
+        logger.info("✓ Schema validation passed")
+    except ValueError as e:
+        logger.error(f"Schema validation failed: {e}")
+        raise
+    
+    # Write to processed directory
+    logger.info(f"Writing processed Parquet to {output_path}")
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    loader.write_cards(df, output_path)
+    
+    logger.info(f"✓ Created {output_path}")
+    
+    return df
+
+
+def initial_setup() -> None:
+    """Download and process MTGJSON Parquet data.
+    
+    Modern Parquet-based setup workflow (replaces legacy CSV approach).
+    
+    Workflow:
+    1. Download cards.parquet from MTGJSON → card_files/raw/cards.parquet
+    2. Process and filter → card_files/processed/all_cards.parquet
+    3. No color-specific files (filter at query time instead)
+    
+    Raises:
+        Various exceptions from download/processing steps
+    """
+    logger.info("=" * 80)
+    logger.info("Starting Parquet-based initial setup")
+    logger.info("=" * 80)
+    
+    # Step 1: Download raw Parquet
+    raw_dir = card_files_raw_dir()
+    raw_path = os.path.join(raw_dir, "cards.parquet")
+    
+    if os.path.exists(raw_path):
+        logger.info(f"Raw Parquet already exists: {raw_path}")
+        logger.info("Skipping download (delete file to re-download)")
+    else:
+        download_parquet_from_mtgjson(raw_path)
+    
+    # Step 2: Process raw → processed
+    processed_path = get_processed_cards_path()
+    
+    logger.info(f"Processing raw Parquet → {processed_path}")
+    process_raw_parquet(raw_path, processed_path)
+    
+    logger.info("=" * 80)
+    logger.info("✓ Parquet setup complete")
+    logger.info(f"  Raw: {raw_path}")
+    logger.info(f"  Processed: {processed_path}")
+    logger.info("=" * 80)
+    
+    # Step 3: Optional image caching (if enabled)
+    try:
+        from code.file_setup.image_cache import ImageCache
+        cache = ImageCache()
+        
+        if cache.is_enabled():
+            logger.info("=" * 80)
+            logger.info("Card image caching enabled - starting download")
+            logger.info("=" * 80)
+            
+            # Download bulk data
+            logger.info("Downloading Scryfall bulk data...")
+            cache.download_bulk_data()
+            
+            # Download images
+            logger.info("Downloading card images (this may take 1-2 hours)...")
+            
+            def progress(current, total, card_name):
+                if current % 100 == 0:  # Log every 100 cards
+                    pct = (current / total) * 100
+                    logger.info(f"  Progress: {current}/{total} ({pct:.1f}%) - {card_name}")
+            
+            stats = cache.download_images(progress_callback=progress)
+            
+            logger.info("=" * 80)
+            logger.info("✓ Image cache complete")
+            logger.info(f"  Downloaded: {stats['downloaded']}")
+            logger.info(f"  Skipped: {stats['skipped']}")
+            logger.info(f"  Failed: {stats['failed']}")
+            logger.info("=" * 80)
+        else:
+            logger.info("Card image caching disabled (CACHE_CARD_IMAGES=0)")
+            logger.info("Images will be fetched from Scryfall API on demand")
+            
+    except Exception as e:
+        logger.error(f"Failed to cache images (continuing anyway): {e}")
+        logger.error("Images will be fetched from Scryfall API on demand")
+
+
+def regenerate_processed_parquet() -> None:
+    """Regenerate processed Parquet from existing raw file.
+    
+    Useful when:
+    - Column processing logic changes
+    - Adding new custom columns
+    - Testing without re-downloading
+    """
+    logger.info("Regenerating processed Parquet from raw file")
+    
+    raw_path = os.path.join(card_files_raw_dir(), "cards.parquet")
+    
+    if not os.path.exists(raw_path):
+        logger.error(f"Raw Parquet not found: {raw_path}")
+        logger.error("Run initial_setup_parquet() first to download")
+        raise FileNotFoundError(f"Raw Parquet not found: {raw_path}")
+    
+    processed_path = get_processed_cards_path()
+    process_raw_parquet(raw_path, processed_path)
+    
+    logger.info(f"✓ Regenerated {processed_path}")
--- a/code/file_setup/setup_constants.py
+++ b/code/file_setup/setup_constants.py
@ -16,8 +16,8 @@ __all__ = [
 # Banned cards consolidated here (remains specific to setup concerns)
 BANNED_CARDS: List[str] = [
    # Commander banned list
-    'Ancestral Recall', 'Balance', 'Biorhythm', 'Black Lotus',
-    'Chaos Orb', 'Channel', 'Dockside Extortionist',
+    '1996 World Champion', 'Ancestral Recall', 'Balance', 'Biorhythm',
+    'Black Lotus', 'Chaos Orb', 'Channel', 'Dockside Extortionist',
    'Emrakul, the Aeons Torn',
    'Erayo, Soratami Ascendant', 'Falling Star', 'Fastbond',
    'Flash', 'Golos, Tireless Pilgrim',
--- a/code/headless_runner.py
+++ b/code/headless_runner.py
@ -31,18 +31,22 @@ def _is_stale(file1: str, file2: str) -> bool:
    return os.path.getmtime(file2) < os.path.getmtime(file1)

 def _ensure_data_ready():
-    cards_csv = os.path.join("csv_files", "cards.csv")
+    # M4: Check for Parquet file instead of CSV
+    from path_util import get_processed_cards_path
+    
+    parquet_path = get_processed_cards_path()
    tagging_json = os.path.join("csv_files", ".tagging_complete.json")
-    # If cards.csv is missing, run full setup+tagging
-    if not os.path.isfile(cards_csv):
-        print("cards.csv not found, running full setup and tagging...")
+    
+    # If all_cards.parquet is missing, run full setup+tagging
+    if not os.path.isfile(parquet_path):
+        print("all_cards.parquet not found, running full setup and tagging...")
        initial_setup()
-        tagger.run_tagging()
+        tagger.run_tagging(parallel=True)  # Use parallel tagging for performance
        _write_tagging_flag(tagging_json)
    # If tagging_complete is missing or stale, run tagging
-    elif not os.path.isfile(tagging_json) or _is_stale(cards_csv, tagging_json):
+    elif not os.path.isfile(tagging_json) or _is_stale(parquet_path, tagging_json):
        print(".tagging_complete.json missing or stale, running tagging...")
-        tagger.run_tagging()
+        tagger.run_tagging(parallel=True)  # Use parallel tagging for performance
        _write_tagging_flag(tagging_json)

 def _write_tagging_flag(tagging_json):
@ -135,7 +139,7 @@ def _validate_commander_available(command_name: str) -> None:
            return

    try:
-        from commander_exclusions import lookup_commander_detail as _lookup_commander_detail  # type: ignore[import-not-found]
+        from commander_exclusions import lookup_commander_detail as _lookup_commander_detail
    except ImportError:  # pragma: no cover
        _lookup_commander_detail = None

@ -277,12 +281,12 @@ def run(
    # Optional deterministic seed for Random Modes (does not affect core when unset)
    try:
        if seed is not None:
-            builder.set_seed(seed)  # type: ignore[attr-defined]
+            builder.set_seed(seed)
    except Exception:
        pass
    # Mark this run as headless so builder can adjust exports and logging
    try:
-        builder.headless = True  # type: ignore[attr-defined]
+        builder.headless = True
    except Exception:
        pass

@ -290,9 +294,9 @@ def run(
    secondary_clean = (secondary_commander or "").strip()
    background_clean = (background or "").strip()
    try:
-        builder.partner_feature_enabled = partner_feature_enabled  # type: ignore[attr-defined]
-        builder.requested_secondary_commander = secondary_clean or None  # type: ignore[attr-defined]
-        builder.requested_background = background_clean or None  # type: ignore[attr-defined]
+        builder.partner_feature_enabled = partner_feature_enabled
+        builder.requested_secondary_commander = secondary_clean or None
+        builder.requested_background = background_clean or None
    except Exception:
        pass

@ -309,11 +313,11 @@ def run(
    
    # Configure include/exclude settings (M1: Config + Validation + Persistence)
    try:
-        builder.include_cards = list(include_cards or [])  # type: ignore[attr-defined]
-        builder.exclude_cards = list(exclude_cards or [])  # type: ignore[attr-defined] 
-        builder.enforcement_mode = enforcement_mode  # type: ignore[attr-defined]
-        builder.allow_illegal = allow_illegal  # type: ignore[attr-defined]
-        builder.fuzzy_matching = fuzzy_matching  # type: ignore[attr-defined]
+        builder.include_cards = list(include_cards or [])
+        builder.exclude_cards = list(exclude_cards or [])
+        builder.enforcement_mode = enforcement_mode
+        builder.allow_illegal = allow_illegal
+        builder.fuzzy_matching = fuzzy_matching
    except Exception:
        pass

@ -332,16 +336,16 @@ def run(
            )

    try:
-        builder.theme_match_mode = theme_resolution.mode  # type: ignore[attr-defined]
-        builder.theme_catalog_version = theme_resolution.catalog_version  # type: ignore[attr-defined]
-        builder.user_theme_requested = list(theme_resolution.requested)  # type: ignore[attr-defined]
-        builder.user_theme_resolved = list(theme_resolution.resolved)  # type: ignore[attr-defined]
-        builder.user_theme_matches = list(theme_resolution.matches)  # type: ignore[attr-defined]
-        builder.user_theme_unresolved = list(theme_resolution.unresolved)  # type: ignore[attr-defined]
-        builder.user_theme_fuzzy_corrections = dict(theme_resolution.fuzzy_corrections)  # type: ignore[attr-defined]
-        builder.user_theme_resolution = theme_resolution  # type: ignore[attr-defined]
+        builder.theme_match_mode = theme_resolution.mode
+        builder.theme_catalog_version = theme_resolution.catalog_version
+        builder.user_theme_requested = list(theme_resolution.requested)
+        builder.user_theme_resolved = list(theme_resolution.resolved)
+        builder.user_theme_matches = list(theme_resolution.matches)
+        builder.user_theme_unresolved = list(theme_resolution.unresolved)
+        builder.user_theme_fuzzy_corrections = dict(theme_resolution.fuzzy_corrections)
+        builder.user_theme_resolution = theme_resolution
        if user_theme_weight is not None:
-            builder.user_theme_weight = float(user_theme_weight)  # type: ignore[attr-defined]
+            builder.user_theme_weight = float(user_theme_weight)
    except Exception:
        pass
        
@ -352,7 +356,7 @@ def run(
            ic: Dict[str, int] = {}
            for k, v in ideal_counts.items():
                try:
-                    iv = int(v) if v is not None else None  # type: ignore
+                    iv = int(v) if v is not None else None
                except Exception:
                    continue
                if iv is None:
@ -361,7 +365,7 @@ def run(
                if k in {"ramp","lands","basic_lands","creatures","removal","wipes","card_advantage","protection"}:
                    ic[k] = iv
            if ic:
-                builder.ideal_counts.update(ic)  # type: ignore[attr-defined]
+                builder.ideal_counts.update(ic)
        except Exception:
            pass
    builder.run_initial_setup()
@ -514,24 +518,24 @@ def _apply_combined_commander_to_builder(builder: DeckBuilder, combined_commande
    """Attach combined commander metadata to the builder for downstream use."""

    try:
-        builder.combined_commander = combined_commander  # type: ignore[attr-defined]
+        builder.combined_commander = combined_commander
    except Exception:
        pass

    try:
-        builder.partner_mode = combined_commander.partner_mode  # type: ignore[attr-defined]
+        builder.partner_mode = combined_commander.partner_mode
    except Exception:
        pass

    try:
-        builder.secondary_commander = combined_commander.secondary_name  # type: ignore[attr-defined]
+        builder.secondary_commander = combined_commander.secondary_name
    except Exception:
        pass

    try:
-        builder.combined_color_identity = combined_commander.color_identity  # type: ignore[attr-defined]
-        builder.combined_theme_tags = combined_commander.theme_tags  # type: ignore[attr-defined]
-        builder.partner_warnings = combined_commander.warnings  # type: ignore[attr-defined]
+        builder.combined_color_identity = combined_commander.color_identity
+        builder.combined_theme_tags = combined_commander.theme_tags
+        builder.partner_warnings = combined_commander.warnings
    except Exception:
        pass

@ -553,7 +557,7 @@ def _export_outputs(builder: DeckBuilder) -> None:
        # Persist for downstream reuse (e.g., random_entrypoint / reroll flows) so they don't re-export
        if csv_path:
            try:
-                builder.last_csv_path = csv_path  # type: ignore[attr-defined]
+                builder.last_csv_path = csv_path
            except Exception:
                pass
    except Exception:
@ -568,7 +572,7 @@ def _export_outputs(builder: DeckBuilder) -> None:
                finally:
                    if txt_generated:
                        try:
-                            builder.last_txt_path = txt_generated  # type: ignore[attr-defined]
+                            builder.last_txt_path = txt_generated
                        except Exception:
                            pass
            else:
@ -578,7 +582,7 @@ def _export_outputs(builder: DeckBuilder) -> None:
                finally:
                    if txt_generated:
                        try:
-                            builder.last_txt_path = txt_generated  # type: ignore[attr-defined]
+                            builder.last_txt_path = txt_generated
                        except Exception:
                            pass
    except Exception:
@ -1192,7 +1196,7 @@ def _run_random_mode(config: RandomRunConfig) -> int:
            RandomConstraintsImpossibleError,
            RandomThemeNoMatchError,
            build_random_full_deck,
-        )  # type: ignore
+        )
    except Exception as exc:
        print(f"Random mode unavailable: {exc}")
        return 1
--- a/code/main.py
+++ b/code/main.py
@ -25,6 +25,7 @@ from file_setup.setup import initial_setup
 from tagging import tagger
 import logging_util
 from settings import CSV_DIRECTORY
+from path_util import get_processed_cards_path

 # Create logger for this module
 logger = logging_util.logging.getLogger(__name__)
@ -40,24 +41,24 @@ def _ensure_data_ready() -> None:
    Path('deck_files').mkdir(parents=True, exist_ok=True)
    Path('logs').mkdir(parents=True, exist_ok=True)

-    # Ensure required CSVs exist and are tagged before proceeding
+    # Ensure required Parquet file exists and is tagged before proceeding
    try:
        import time
        import json as _json
        from datetime import datetime as _dt
-        cards_path = os.path.join(CSV_DIRECTORY, 'cards.csv')
+        parquet_path = get_processed_cards_path()
        flag_path = os.path.join(CSV_DIRECTORY, '.tagging_complete.json')
        refresh_needed = False
-        # Missing CSV forces refresh
-        if not os.path.exists(cards_path):
-            logger.info("cards.csv not found. Running initial setup and tagging...")
+        # Missing Parquet file forces refresh
+        if not os.path.exists(parquet_path):
+            logger.info("all_cards.parquet not found. Running initial setup and tagging...")
            refresh_needed = True
        else:
-            # Stale CSV (>7 days) forces refresh
+            # Stale Parquet file (>7 days) forces refresh
            try:
-                age_seconds = time.time() - os.path.getmtime(cards_path)
+                age_seconds = time.time() - os.path.getmtime(parquet_path)
                if age_seconds > 7 * 24 * 60 * 60:
-                    logger.info("cards.csv is older than 7 days. Refreshing data (setup + tagging)...")
+                    logger.info("all_cards.parquet is older than 7 days. Refreshing data (setup + tagging)...")
                    refresh_needed = True
            except Exception:
                pass
@ -67,7 +68,7 @@ def _ensure_data_ready() -> None:
            refresh_needed = True
        if refresh_needed:
            initial_setup()
-            tagger.run_tagging()
+            tagger.run_tagging(parallel=True)  # Use parallel tagging for performance
            # Write tagging completion flag
            try:
                os.makedirs(CSV_DIRECTORY, exist_ok=True)
--- a/code/path_util.py
+++ b/code/path_util.py
@ -7,6 +7,8 @@ def csv_dir() -> str:
    """Return the base directory for CSV files.

    Defaults to 'csv_files'. Override with CSV_FILES_DIR for tests or advanced setups.
+    
+    NOTE: DEPRECATED in v3.0.0 - Use card_files_dir() instead.
    """
    try:
        base = os.getenv("CSV_FILES_DIR")
@ -14,3 +16,84 @@ def csv_dir() -> str:
        return base or "csv_files"
    except Exception:
        return "csv_files"
+
+
+# New Parquet-based directory utilities (v3.0.0+)
+
+def card_files_dir() -> str:
+    """Return the base directory for card files (Parquet and metadata).
+    
+    Defaults to 'card_files'. Override with CARD_FILES_DIR environment variable.
+    """
+    try:
+        base = os.getenv("CARD_FILES_DIR")
+        base = base.strip() if isinstance(base, str) else None
+        return base or "card_files"
+    except Exception:
+        return "card_files"
+
+
+def card_files_raw_dir() -> str:
+    """Return the directory for raw MTGJSON Parquet files.
+    
+    Defaults to 'card_files/raw'. Override with CARD_FILES_RAW_DIR environment variable.
+    """
+    try:
+        base = os.getenv("CARD_FILES_RAW_DIR")
+        base = base.strip() if isinstance(base, str) else None
+        return base or os.path.join(card_files_dir(), "raw")
+    except Exception:
+        return os.path.join(card_files_dir(), "raw")
+
+
+def card_files_processed_dir() -> str:
+    """Return the directory for processed/tagged Parquet files.
+    
+    Defaults to 'card_files/processed'. Override with CARD_FILES_PROCESSED_DIR environment variable.
+    """
+    try:
+        base = os.getenv("CARD_FILES_PROCESSED_DIR")
+        base = base.strip() if isinstance(base, str) else None
+        return base or os.path.join(card_files_dir(), "processed")
+    except Exception:
+        return os.path.join(card_files_dir(), "processed")
+
+
+def get_raw_cards_path() -> str:
+    """Get the path to the raw MTGJSON Parquet file.
+    
+    Returns:
+        Path to card_files/raw/cards.parquet
+    """
+    return os.path.join(card_files_raw_dir(), "cards.parquet")
+
+
+def get_processed_cards_path() -> str:
+    """Get the path to the processed/tagged Parquet file.
+    
+    Returns:
+        Path to card_files/processed/all_cards.parquet
+    """
+    return os.path.join(card_files_processed_dir(), "all_cards.parquet")
+
+
+def get_commander_cards_path() -> str:
+    """Get the path to the pre-filtered commander-only Parquet file.
+    
+    Returns:
+        Path to card_files/processed/commander_cards.parquet
+    """
+    return os.path.join(card_files_processed_dir(), "commander_cards.parquet")
+
+
+def get_batch_path(batch_id: int) -> str:
+    """Get the path to a batch Parquet file.
+    
+    Args:
+        batch_id: Batch number (e.g., 0, 1, 2, ...)
+    
+    Returns:
+        Path to card_files/processed/batch_NNNN.parquet
+    """
+    return os.path.join(card_files_processed_dir(), f"batch_{batch_id:04d}.parquet")
+
--- a/code/scripts/aggregate_cards.py
+++ b/code/scripts/aggregate_cards.py
@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+Aggregate Cards CLI Script
+
+Command-line interface for consolidating individual card CSV files into a single
+Parquet file. Useful for manual aggregation runs, testing, and recovery.
+
+Usage:
+    python code/scripts/aggregate_cards.py
+    python code/scripts/aggregate_cards.py --source csv_files --output card_files/all_cards.parquet
+    python code/scripts/aggregate_cards.py --validate-only
+    python code/scripts/aggregate_cards.py --incremental
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from pathlib import Path
+
+# Add project root to path for imports
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from code.file_setup.card_aggregator import CardAggregator
+from code.logging_util import get_logger
+from code.settings import CSV_DIRECTORY, CARD_FILES_DIRECTORY
+
+# Initialize logger
+logger = get_logger(__name__)
+
+
+def main() -> int:
+    """Main entry point for aggregate_cards CLI."""
+    parser = argparse.ArgumentParser(
+        description="Aggregate individual card CSV files into consolidated Parquet file",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--source",
+        "-s",
+        default=CSV_DIRECTORY,
+        help=f"Source directory containing card CSV files (default: {CSV_DIRECTORY})",
+    )
+
+    parser.add_argument(
+        "--output",
+        "-o",
+        default=None,
+        help="Output Parquet file path (default: card_files/all_cards.parquet)",
+    )
+
+    parser.add_argument(
+        "--output-dir",
+        default=CARD_FILES_DIRECTORY,
+        help=f"Output directory for Parquet files (default: {CARD_FILES_DIRECTORY})",
+    )
+
+    parser.add_argument(
+        "--validate-only",
+        action="store_true",
+        help="Only validate existing output file, don't aggregate",
+    )
+
+    parser.add_argument(
+        "--incremental",
+        "-i",
+        action="store_true",
+        help="Perform incremental update (only changed files)",
+    )
+
+    parser.add_argument(
+        "--keep-versions",
+        type=int,
+        default=3,
+        help="Number of historical versions to keep (default: 3)",
+    )
+
+    args = parser.parse_args()
+
+    # Initialize aggregator
+    aggregator = CardAggregator(output_dir=args.output_dir)
+
+    # Determine output path
+    output_path = args.output or f"{args.output_dir}/all_cards.parquet"
+
+    try:
+        if args.validate_only:
+            # Validation only mode
+            logger.info(f"Validating {output_path}...")
+            is_valid, errors = aggregator.validate_output(output_path, args.source)
+
+            if is_valid:
+                logger.info("✓ Validation passed")
+                return 0
+            else:
+                logger.error("✗ Validation failed:")
+                for error in errors:
+                    logger.error(f"  - {error}")
+                return 1
+
+        elif args.incremental:
+            # Incremental update mode
+            logger.info("Starting incremental aggregation...")
+            metadata_path = f"{args.output_dir}/.aggregate_metadata.json"
+            changed_files = aggregator.detect_changes(args.source, metadata_path)
+
+            if not changed_files:
+                logger.info("No changes detected, skipping aggregation")
+                return 0
+
+            stats = aggregator.incremental_update(changed_files, output_path)
+
+        else:
+            # Full aggregation mode
+            logger.info("Starting full aggregation...")
+            stats = aggregator.aggregate_all(args.source, output_path)
+
+        # Print summary
+        print("\n" + "=" * 60)
+        print("AGGREGATION SUMMARY")
+        print("=" * 60)
+        print(f"Files processed:     {stats['files_processed']}")
+        print(f"Total cards:         {stats['total_cards']:,}")
+        print(f"Duplicates removed:  {stats['duplicates_removed']:,}")
+        print(f"File size:           {stats['file_size_mb']:.2f} MB")
+        print(f"Time elapsed:        {stats['elapsed_seconds']:.2f} seconds")
+        print(f"Output:              {output_path}")
+        print("=" * 60)
+
+        # Run validation
+        logger.info("\nValidating output...")
+        is_valid, errors = aggregator.validate_output(output_path, args.source)
+
+        if is_valid:
+            logger.info("✓ Validation passed")
+            return 0
+        else:
+            logger.error("✗ Validation failed:")
+            for error in errors:
+                logger.error(f"  - {error}")
+            return 1
+
+    except FileNotFoundError as e:
+        logger.error(f"Error: {e}")
+        return 1
+    except ValueError as e:
+        logger.error(f"Error: {e}")
+        return 1
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/code/scripts/audit_protection_full_v2.py
+++ b/code/scripts/audit_protection_full_v2.py
@ -1,203 +0,0 @@
-"""
-Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
-
-Created: October 8, 2025
-Purpose: Audit and validate Protection tag precision after implementing grant detection.
-         Can be re-run periodically to check tagging quality.
-
-This script audits ALL Protection-tagged cards and categorizes them:
- Grant: Gives broad protection to other permanents YOU control
- Kindred: Gives protection to specific creature types (metadata tags)
- Mixed: Both broad and kindred/inherent
- Inherent: Only has protection itself
- ConditionalSelf: Only conditionally grants to itself
- Opponent: Grants to opponent's permanents
- Neither: False positive
-
-Outputs:
- m2_audit_v2.json: Full analysis with summary
- m2_audit_v2_grant.csv: Cards for main Protection tag
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
- m2_audit_v2_neither.csv: False positives (exclude)
- m2_audit_v2_all.csv: All cards combined
-"""
-
-import sys
-from pathlib import Path
-import pandas as pd
-import json
-
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent
-sys.path.insert(0, str(project_root))
-
-from code.tagging.protection_grant_detection import (
-    categorize_protection_card,
-    get_kindred_protection_tags,
-    is_granting_protection,
-)
-
-def load_all_cards():
-    """Load all cards from color/identity CSV files."""
-    csv_dir = project_root / 'csv_files'
-    
-    # Get all color/identity CSVs (not the raw cards.csv)
-    csv_files = list(csv_dir.glob('*_cards.csv'))
-    csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
-    
-    all_cards = []
-    for csv_file in csv_files:
-        try:
-            df = pd.read_csv(csv_file)
-            all_cards.append(df)
-        except Exception as e:
-            print(f"Warning: Could not load {csv_file.name}: {e}")
-    
-    # Combine all DataFrames
-    combined = pd.concat(all_cards, ignore_index=True)
-    
-    # Drop duplicates (cards appear in multiple color files)
-    combined = combined.drop_duplicates(subset=['name'], keep='first')
-    
-    return combined
-
-def audit_all_protection_cards():
-    """Audit all Protection-tagged cards."""
-    print("Loading all cards...")
-    df = load_all_cards()
-    
-    print(f"Total cards loaded: {len(df)}")
-    
-    # Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
-    df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
-    
-    print(f"Protection-tagged cards: {len(df_prot)}")
-    
-    # Categorize each card
-    categories = []
-    grants_list = []
-    kindred_tags_list = []
-    
-    for idx, row in df_prot.iterrows():
-        name = row['name']
-        text = str(row.get('text', '')).replace('\\n', '\n')  # Convert escaped newlines to real newlines
-        keywords = str(row.get('keywords', ''))
-        card_type = str(row.get('type', ''))
-        
-        # Categorize with kindred exclusion enabled
-        category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
-        
-        # Check if it grants broadly
-        grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
-        
-        # Get kindred tags
-        kindred_tags = get_kindred_protection_tags(text)
-        
-        categories.append(category)
-        grants_list.append(grants_broad)
-        kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
-    
-    df_prot['category'] = categories
-    df_prot['grants_broad'] = grants_list
-    df_prot['kindred_tags'] = kindred_tags_list
-    
-    # Generate summary (convert numpy types to native Python for JSON serialization)
-    summary = {
-        'total': int(len(df_prot)),
-        'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
-        'grants_broad_count': int(df_prot['grants_broad'].sum()),
-        'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
-    }
-    
-    # Calculate keep vs remove
-    keep_categories = {'Grant', 'Mixed'}
-    kindred_only = df_prot[df_prot['category'] == 'Kindred']
-    keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
-    remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
-    
-    summary['keep_main_tag'] = keep_count
-    summary['kindred_metadata'] = len(kindred_only)
-    summary['remove'] = remove_count
-    summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
-    
-    # Print summary
-    print(f"\n{'='*60}")
-    print("AUDIT SUMMARY")
-    print(f"{'='*60}")
-    print(f"Total Protection-tagged cards: {summary['total']}")
-    print(f"\nCategories:")
-    for cat, count in sorted(summary['categories'].items()):
-        pct = (count / summary['total']) * 100
-        print(f"  {cat:20s} {count:4d} ({pct:5.1f}%)")
-    
-    print(f"\n{'='*60}")
-    print(f"Main Protection tag:  {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
-    print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
-    print(f"Remove:               {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
-    print(f"{'='*60}")
-    print(f"Precision estimate:   {summary['precision_estimate']}%")
-    print(f"{'='*60}\n")
-    
-    # Export results
-    output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
-    output_dir.mkdir(parents=True, exist_ok=True)
-    
-    # Export JSON summary
-    with open(output_dir / 'm2_audit_v2.json', 'w') as f:
-        json.dump({
-            'summary': summary,
-            'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
-        }, f, indent=2)
-    
-    # Export CSVs by category
-    export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
-    
-    # Grant category
-    df_grant = df_prot[df_prot['category'] == 'Grant']
-    df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
-    print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
-    
-    # Kindred category
-    df_kindred = df_prot[df_prot['category'] == 'Kindred']
-    df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
-    print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
-    
-    # Mixed category
-    df_mixed = df_prot[df_prot['category'] == 'Mixed']
-    df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
-    print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
-    
-    # ConditionalSelf category
-    df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
-    df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
-    print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
-    
-    # Inherent category
-    df_inherent = df_prot[df_prot['category'] == 'Inherent']
-    df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
-    print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
-    
-    # Opponent category
-    df_opponent = df_prot[df_prot['category'] == 'Opponent']
-    df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
-    print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
-    
-    # Neither category
-    df_neither = df_prot[df_prot['category'] == 'Neither']
-    df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
-    print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
-    
-    # All cards
-    df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
-    print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
-    
-    print(f"\nAll files saved to: {output_dir}")
-    
-    return df_prot, summary
-
-if __name__ == '__main__':
-    df_results, summary = audit_all_protection_cards()
--- a/code/scripts/benchmark_parquet.py
+++ b/code/scripts/benchmark_parquet.py
@ -0,0 +1,160 @@
+"""Benchmark Parquet vs CSV performance."""
+
+import pandas as pd
+import time
+import os
+
+def benchmark_full_load():
+    """Benchmark loading full dataset."""
+    csv_path = 'csv_files/cards.csv'
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    print("=== FULL LOAD BENCHMARK ===\n")
+    
+    # CSV load
+    print("Loading CSV...")
+    start = time.time()
+    df_csv = pd.read_csv(csv_path, low_memory=False)
+    csv_time = time.time() - start
+    csv_rows = len(df_csv)
+    csv_memory = df_csv.memory_usage(deep=True).sum() / 1024 / 1024
+    print(f"  Time: {csv_time:.3f}s")
+    print(f"  Rows: {csv_rows:,}")
+    print(f"  Memory: {csv_memory:.2f} MB")
+    
+    # Parquet load
+    print("\nLoading Parquet...")
+    start = time.time()
+    df_parquet = pd.read_parquet(parquet_path)
+    parquet_time = time.time() - start
+    parquet_rows = len(df_parquet)
+    parquet_memory = df_parquet.memory_usage(deep=True).sum() / 1024 / 1024
+    print(f"  Time: {parquet_time:.3f}s")
+    print(f"  Rows: {parquet_rows:,}")
+    print(f"  Memory: {parquet_memory:.2f} MB")
+    
+    # Comparison
+    speedup = csv_time / parquet_time
+    memory_reduction = (1 - parquet_memory / csv_memory) * 100
+    print(f"\n📊 Results:")
+    print(f"  Speedup: {speedup:.2f}x faster")
+    print(f"  Memory: {memory_reduction:.1f}% less")
+    
+    return df_csv, df_parquet
+
+def benchmark_column_selection():
+    """Benchmark loading with column selection (Parquet optimization)."""
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    print("\n\n=== COLUMN SELECTION BENCHMARK (Parquet only) ===\n")
+    
+    # Essential columns for deck building
+    essential_columns = ['name', 'colorIdentity', 'type', 'types', 'manaValue', 
+                         'manaCost', 'power', 'toughness', 'text', 'rarity']
+    
+    # Full load
+    print("Loading all columns...")
+    start = time.time()
+    df_full = pd.read_parquet(parquet_path)
+    full_time = time.time() - start
+    full_memory = df_full.memory_usage(deep=True).sum() / 1024 / 1024
+    print(f"  Time: {full_time:.3f}s")
+    print(f"  Columns: {len(df_full.columns)}")
+    print(f"  Memory: {full_memory:.2f} MB")
+    
+    # Selective load
+    print(f"\nLoading {len(essential_columns)} essential columns...")
+    start = time.time()
+    df_selective = pd.read_parquet(parquet_path, columns=essential_columns)
+    selective_time = time.time() - start
+    selective_memory = df_selective.memory_usage(deep=True).sum() / 1024 / 1024
+    print(f"  Time: {selective_time:.3f}s")
+    print(f"  Columns: {len(df_selective.columns)}")
+    print(f"  Memory: {selective_memory:.2f} MB")
+    
+    # Comparison
+    speedup = full_time / selective_time
+    memory_reduction = (1 - selective_memory / full_memory) * 100
+    print(f"\n📊 Results:")
+    print(f"  Speedup: {speedup:.2f}x faster")
+    print(f"  Memory: {memory_reduction:.1f}% less")
+
+def benchmark_filtering():
+    """Benchmark filtering by colorIdentity (single file approach)."""
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    print("\n\n=== COLOR IDENTITY FILTERING BENCHMARK ===\n")
+    
+    # Load data
+    print("Loading Parquet with essential columns...")
+    essential_columns = ['name', 'colorIdentity', 'type', 'manaValue']
+    start = time.time()
+    df = pd.read_parquet(parquet_path, columns=essential_columns)
+    load_time = time.time() - start
+    print(f"  Load time: {load_time:.3f}s")
+    print(f"  Total cards: {len(df):,}")
+    
+    # Test different color identities
+    test_cases = [
+        ("Colorless (C)", ["C", ""]),
+        ("Mono-White (W)", ["W", "C", ""]),
+        ("Bant (GUW)", ["C", "", "G", "U", "W", "G,U", "G,W", "U,W", "G,U,W"]),
+        ("5-Color (WUBRG)", ["C", "", "W", "U", "B", "R", "G", 
+                             "W,U", "W,B", "W,R", "W,G", "U,B", "U,R", "U,G", "B,R", "B,G", "R,G",
+                             "W,U,B", "W,U,R", "W,U,G", "W,B,R", "W,B,G", "W,R,G", "U,B,R", "U,B,G", "U,R,G", "B,R,G",
+                             "W,U,B,R", "W,U,B,G", "W,U,R,G", "W,B,R,G", "U,B,R,G",
+                             "W,U,B,R,G"]),
+    ]
+    
+    for test_name, valid_identities in test_cases:
+        print(f"\n{test_name}:")
+        start = time.time()
+        filtered = df[df['colorIdentity'].isin(valid_identities)]
+        filter_time = (time.time() - start) * 1000  # Convert to ms
+        print(f"  Filter time: {filter_time:.1f}ms")
+        print(f"  Cards found: {len(filtered):,}")
+        print(f"  % of total: {len(filtered) / len(df) * 100:.1f}%")
+
+def benchmark_data_types():
+    """Check data types and list handling."""
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    print("\n\n=== DATA TYPE ANALYSIS ===\n")
+    
+    df = pd.read_parquet(parquet_path)
+    
+    # Check list-type columns
+    list_cols = []
+    for col in df.columns:
+        sample = df[col].dropna().iloc[0] if df[col].notna().any() else None
+        if isinstance(sample, (list, tuple)):
+            list_cols.append(col)
+    
+    print(f"Columns stored as lists: {len(list_cols)}")
+    for col in list_cols:
+        sample = df[col].dropna().iloc[0]
+        print(f"  {col}: {sample}")
+    
+    # Check critical columns for deck building
+    critical_cols = ['name', 'colorIdentity', 'type', 'types', 'subtypes', 
+                     'manaValue', 'manaCost', 'text', 'keywords']
+    
+    print(f"\n✓ Critical columns for deck building:")
+    for col in critical_cols:
+        if col in df.columns:
+            dtype = str(df[col].dtype)
+            null_pct = (df[col].isna().sum() / len(df)) * 100
+            sample = df[col].dropna().iloc[0] if df[col].notna().any() else None
+            sample_type = type(sample).__name__
+            print(f"  {col:20s} dtype={dtype:10s} null={null_pct:5.1f}% sample_type={sample_type}")
+
+if __name__ == "__main__":
+    # Run benchmarks
+    df_csv, df_parquet = benchmark_full_load()
+    benchmark_column_selection()
+    benchmark_filtering()
+    benchmark_data_types()
+    
+    print("\n\n=== SUMMARY ===")
+    print("✅ All benchmarks complete!")
+    print("📁 File size: 77.2% smaller (88.94 MB → 20.27 MB)")
--- a/code/scripts/build_similarity_cache_parquet.py
+++ b/code/scripts/build_similarity_cache_parquet.py
@ -0,0 +1,446 @@
+"""
+Build similarity cache for all cards in the database using Parquet format.
+
+Pre-computes and stores similarity calculations for ~29k cards to improve
+card detail page performance from 2-6s down to <500ms.
+
+NOTE: This script assumes card data and tagging are already complete.
+Run setup and tagging separately before building the cache.
+
+Usage:
+    python -m code.scripts.build_similarity_cache_parquet [--parallel] [--checkpoint-interval 100]
+    
+Options:
+    --parallel              Enable parallel processing (faster but uses more CPU)
+    --checkpoint-interval   Save cache every N cards (default: 100)
+    --force                 Rebuild cache even if it exists
+    --dry-run               Calculate without saving (for testing)
+    --workers N             Number of parallel workers (default: auto-detect)
+"""
+
+import argparse
+import logging
+import sys
+import time
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from datetime import datetime
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parents[2]
+sys.path.insert(0, str(project_root))
+
+from code.web.services.card_similarity import CardSimilarity
+from code.web.services.similarity_cache import SimilarityCache, get_cache
+
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# Shared data for worker processes (passed during initialization, not reloaded per worker)
+_shared_cards_df = None
+_shared_theme_frequencies = None
+_shared_cleaned_tags = None
+_worker_similarity = None
+
+
+def _init_worker(cards_df_pickled: bytes, theme_frequencies: dict, cleaned_tags: dict):
+    """
+    Initialize worker process with shared data.
+    Called once when each worker process starts.
+
+    Args:
+        cards_df_pickled: Pickled DataFrame of all cards
+        theme_frequencies: Pre-computed theme frequency dict
+        cleaned_tags: Pre-computed cleaned tags cache
+    """
+    import pickle
+    import logging
+
+    global _shared_cards_df, _shared_theme_frequencies, _shared_cleaned_tags, _worker_similarity
+
+    # Unpickle shared data once per worker
+    _shared_cards_df = pickle.loads(cards_df_pickled)
+    _shared_theme_frequencies = theme_frequencies
+    _shared_cleaned_tags = cleaned_tags
+
+    # Create worker-level CardSimilarity instance with shared data
+    _worker_similarity = CardSimilarity(cards_df=_shared_cards_df)
+
+    # Override pre-computed data to avoid recomputation
+    _worker_similarity.theme_frequencies = _shared_theme_frequencies
+    _worker_similarity.cleaned_tags_cache = _shared_cleaned_tags
+
+    # Suppress verbose logging in workers
+    logging.getLogger("card_similarity").setLevel(logging.WARNING)
+
+
+def calculate_similarity_for_card(args: tuple) -> tuple[str, list[dict], bool]:
+    """
+    Calculate similarity for a single card (worker function for parallel processing).
+
+    Args:
+        args: Tuple of (card_name, threshold, min_results, limit)
+
+    Returns:
+        Tuple of (card_name, similar_cards, success)
+    """
+    card_name, threshold, min_results, limit = args
+
+    try:
+        # Use the global worker-level CardSimilarity instance
+        global _worker_similarity
+        if _worker_similarity is None:
+            # Fallback if initializer wasn't called (shouldn't happen)
+            _worker_similarity = CardSimilarity()
+
+        # Calculate without using cache (we're building it)
+        similar_cards = _worker_similarity.find_similar(
+            card_name=card_name,
+            threshold=threshold,
+            min_results=min_results,
+            limit=limit,
+            adaptive=True,
+            use_cache=False,
+        )
+
+        return card_name, similar_cards, True
+
+    except Exception as e:
+        logger.error(f"Failed to calculate similarity for '{card_name}': {e}")
+        return card_name, [], False
+
+
+def _add_results_to_cache(cache_df: pd.DataFrame, card_name: str, similar_cards: list[dict]) -> pd.DataFrame:
+    """
+    Add similarity results for a card to the cache DataFrame.
+
+    Args:
+        cache_df: Existing cache DataFrame
+        card_name: Name of the card
+        similar_cards: List of similar cards with scores
+
+    Returns:
+        Updated DataFrame
+    """
+    # Build new rows
+    new_rows = []
+    for rank, card in enumerate(similar_cards):
+        new_rows.append({
+            "card_name": card_name,
+            "similar_name": card["name"],
+            "similarity": card["similarity"],
+            "edhrecRank": card.get("edhrecRank", float("inf")),
+            "rank": rank,
+        })
+
+    if new_rows:
+        new_df = pd.DataFrame(new_rows)
+        cache_df = pd.concat([cache_df, new_df], ignore_index=True)
+
+    return cache_df
+
+
+def build_cache(
+    parallel: bool = False,
+    workers: int | None = None,
+    checkpoint_interval: int = 100,
+    force: bool = False,
+    dry_run: bool = False,
+) -> None:
+    """
+    Build similarity cache for all cards.
+    
+    NOTE: Assumes card data (card_files/processed/all_cards.parquet) and tagged data already exist.
+    Run setup and tagging separately before building cache.
+
+    Args:
+        parallel: Enable parallel processing
+        workers: Number of parallel workers (None = auto-detect)
+        checkpoint_interval: Save cache every N cards
+        force: Rebuild even if cache exists
+        dry_run: Calculate without saving
+    """
+    logger.info("=" * 80)
+    logger.info("Similarity Cache Builder (Parquet Edition)")
+    logger.info("=" * 80)
+    logger.info("")
+
+    # Initialize cache
+    cache = get_cache()
+
+    # Quick check for complete cache - if metadata says build is done, exit
+    if not force and cache.cache_path.exists() and not dry_run:
+        metadata = cache._metadata or {}
+        is_complete = metadata.get("build_complete", False)
+        
+        if is_complete:
+            stats = cache.get_stats()
+            logger.info(f"Cache already complete with {stats['total_cards']:,} cards")
+            logger.info("Use --force to rebuild")
+            return
+        else:
+            stats = cache.get_stats()
+            logger.info(f"Resuming incomplete cache with {stats['total_cards']:,} cards")
+
+    if dry_run:
+        logger.info("DRY RUN MODE - No changes will be saved")
+        logger.info("")
+
+    # Initialize similarity engine
+    logger.info("Initializing similarity engine...")
+    similarity = CardSimilarity()
+    total_cards = len(similarity.cards_df)
+    logger.info(f"Loaded {total_cards:,} cards")
+    logger.info("")
+
+    # Filter out low-value lands (single-sided with <3 tags)
+    df = similarity.cards_df
+    df["is_land"] = df["type"].str.contains("Land", case=False, na=False)
+    df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"])
+    # M4: themeTags is now a list (Parquet format), not a pipe-delimited string
+    df["tag_count"] = df["themeTags"].apply(lambda x: len(x) if isinstance(x, list) else 0)
+
+    # Keep cards that are either:
+    # 1. Not lands, OR
+    # 2. Multi-faced lands, OR
+    # 3. Single-sided lands with >= 3 tags
+    keep_mask = (~df["is_land"]) | (df["is_multifaced"]) | (df["is_land"] & (df["tag_count"] >= 3))
+
+    card_names = df[keep_mask]["name"].tolist()
+    skipped_lands = (~keep_mask & df["is_land"]).sum()
+
+    logger.info(f"Filtered out {skipped_lands} low-value lands (single-sided with <3 tags)")
+    logger.info(f"Processing {len(card_names):,} cards ({len(card_names)/total_cards*100:.1f}% of total)")
+    logger.info("")
+
+    # Configuration for similarity calculation
+    threshold = 0.8
+    min_results = 3
+    limit = 20  # Cache up to 20 similar cards per card for variety
+
+    # Initialize cache data structure - try to load existing for resume
+    existing_cache_df = cache.load_cache()
+    already_processed = set()
+
+    if len(existing_cache_df) > 0 and not dry_run:
+        # Resume from checkpoint - keep existing data
+        cache_df = existing_cache_df
+        already_processed = set(existing_cache_df["card_name"].unique())
+        logger.info(f"Resuming from checkpoint with {len(already_processed):,} cards already processed")
+
+        # Setup metadata
+        metadata = cache._metadata or cache._empty_metadata()
+    else:
+        # Start fresh
+        cache_df = cache._empty_cache_df()
+        metadata = cache._empty_metadata()
+        metadata["build_date"] = datetime.now().isoformat()
+        metadata["threshold"] = threshold
+        metadata["min_results"] = min_results
+
+    # Track stats
+    start_time = time.time()
+    processed = len(already_processed)  # Start count from checkpoint
+    failed = 0
+    checkpoint_count = 0
+
+    try:
+        if parallel:
+            # Parallel processing - use available CPU cores
+            import os
+            import pickle
+
+            if workers is not None:
+                max_workers = max(1, workers)  # User-specified, minimum 1
+                logger.info(f"Using {max_workers} worker processes (user-specified)")
+            else:
+                cpu_count = os.cpu_count() or 4
+                # Use CPU count - 1 to leave one core for system, minimum 4
+                max_workers = max(4, cpu_count - 1)
+                logger.info(f"Detected {cpu_count} CPUs, using {max_workers} worker processes")
+
+            # Prepare shared data (pickle DataFrame once, share with all workers)
+            logger.info("Preparing shared data for workers...")
+            cards_df_pickled = pickle.dumps(similarity.cards_df)
+            theme_frequencies = similarity.theme_frequencies.copy()
+            cleaned_tags = similarity.cleaned_tags_cache.copy()
+            logger.info(f"Shared data prepared: {len(cards_df_pickled):,} bytes (DataFrame), "
+                       f"{len(theme_frequencies)} themes, {len(cleaned_tags)} cleaned tag sets")
+
+            # Prepare arguments for cards not yet processed
+            cards_to_process = [name for name in card_names if name not in already_processed]
+            logger.info(f"Cards to process: {len(cards_to_process):,} (skipping {len(already_processed):,} already done)")
+
+            card_args = [(name, threshold, min_results, limit) for name in cards_to_process]
+
+            with ProcessPoolExecutor(
+                max_workers=max_workers,
+                initializer=_init_worker,
+                initargs=(cards_df_pickled, theme_frequencies, cleaned_tags)
+            ) as executor:
+                # Submit all tasks
+                future_to_card = {
+                    executor.submit(calculate_similarity_for_card, args): args[0]
+                    for args in card_args
+                }
+
+                # Process results as they complete
+                for future in as_completed(future_to_card):
+                    card_name, similar_cards, success = future.result()
+
+                    if success:
+                        cache_df = _add_results_to_cache(cache_df, card_name, similar_cards)
+                        processed += 1
+                    else:
+                        failed += 1
+
+                    # Progress reporting
+                    total_to_process = len(card_names)
+                    if processed % 100 == 0:
+                        elapsed = time.time() - start_time
+                        # Calculate rate based on cards processed THIS session
+                        cards_this_session = processed - len(already_processed)
+                        rate = cards_this_session / elapsed if elapsed > 0 else 0
+                        cards_remaining = total_to_process - processed
+                        eta = cards_remaining / rate if rate > 0 else 0
+                        logger.info(
+                            f"Progress: {processed}/{total_to_process} "
+                            f"({processed/total_to_process*100:.1f}%) - "
+                            f"Rate: {rate:.1f} cards/sec - "
+                            f"ETA: {eta/60:.1f} min"
+                        )
+
+                    # Checkpoint save
+                    if not dry_run and processed % checkpoint_interval == 0:
+                        checkpoint_count += 1
+                        cache.save_cache(cache_df, metadata)
+                        logger.info(f"Checkpoint {checkpoint_count}: Saved cache with {processed:,} cards")
+
+        else:
+            # Serial processing - skip already processed cards
+            cards_to_process = [name for name in card_names if name not in already_processed]
+            logger.info(f"Cards to process: {len(cards_to_process):,} (skipping {len(already_processed):,} already done)")
+
+            for i, card_name in enumerate(cards_to_process, start=1):
+                try:
+                    similar_cards = similarity.find_similar(
+                        card_name=card_name,
+                        threshold=threshold,
+                        min_results=min_results,
+                        limit=limit,
+                        adaptive=True,
+                        use_cache=False,
+                    )
+
+                    cache_df = _add_results_to_cache(cache_df, card_name, similar_cards)
+                    processed += 1
+
+                except Exception as e:
+                    logger.error(f"Failed to process '{card_name}': {e}")
+                    failed += 1
+
+                # Progress reporting
+                if i % 100 == 0:
+                    elapsed = time.time() - start_time
+                    rate = i / elapsed if elapsed > 0 else 0
+                    cards_remaining = len(card_names) - i
+                    eta = cards_remaining / rate if rate > 0 else 0
+                    logger.info(
+                        f"Progress: {i}/{len(card_names)} "
+                        f"({i/len(card_names)*100:.1f}%) - "
+                        f"Rate: {rate:.1f} cards/sec - "
+                        f"ETA: {eta/60:.1f} min"
+                    )
+
+                # Checkpoint save
+                if not dry_run and i % checkpoint_interval == 0:
+                    checkpoint_count += 1
+                    cache.save_cache(cache_df, metadata)
+                    logger.info(f"Checkpoint {checkpoint_count}: Saved cache with {i:,} cards")
+
+        # Final save
+        if not dry_run:
+            metadata["last_updated"] = datetime.now().isoformat()
+            metadata["build_complete"] = True
+            cache.save_cache(cache_df, metadata)
+
+        # Summary
+        elapsed = time.time() - start_time
+        logger.info("")
+        logger.info("=" * 80)
+        logger.info("Build Complete")
+        logger.info("=" * 80)
+        logger.info(f"Total time: {elapsed/60:.2f} minutes")
+        logger.info(f"Cards processed: {processed:,}")
+        logger.info(f"Failed: {failed}")
+        logger.info(f"Checkpoints saved: {checkpoint_count}")
+
+        if processed > 0:
+            logger.info(f"Average rate: {processed/elapsed:.2f} cards/sec")
+
+        if not dry_run:
+            stats = cache.get_stats()
+            logger.info(f"Cache file size: {stats.get('file_size_mb', 0):.2f} MB")
+            logger.info(f"Cache location: {cache.cache_path}")
+
+    except KeyboardInterrupt:
+        logger.warning("\nBuild interrupted by user")
+
+        # Save partial cache
+        if not dry_run and len(cache_df) > 0:
+            metadata["last_updated"] = datetime.now().isoformat()
+            cache.save_cache(cache_df, metadata)
+            logger.info(f"Saved partial cache with {processed:,} cards")
+
+
+def main():
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="Build similarity cache for all cards (Parquet format)"
+    )
+    parser.add_argument(
+        "--parallel",
+        action="store_true",
+        help="Enable parallel processing",
+    )
+    parser.add_argument(
+        "--workers",
+        type=int,
+        default=None,
+        help="Number of parallel workers (default: auto-detect)",
+    )
+    parser.add_argument(
+        "--checkpoint-interval",
+        type=int,
+        default=100,
+        help="Save cache every N cards (default: 100)",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Rebuild cache even if it exists",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Calculate without saving (for testing)",
+    )
+
+    args = parser.parse_args()
+
+    build_cache(
+        parallel=args.parallel,
+        workers=args.workers,
+        checkpoint_interval=args.checkpoint_interval,
+        force=args.force,
+        dry_run=args.dry_run,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/code/scripts/build_theme_catalog.py
+++ b/code/scripts/build_theme_catalog.py
@ -36,7 +36,7 @@ except Exception:  # pragma: no cover

 try:
    # Support running as `python code/scripts/build_theme_catalog.py` when 'code' already on path
-    from scripts.extract_themes import (  # type: ignore
+    from scripts.extract_themes import (
        BASE_COLORS,
        collect_theme_tags_from_constants,
        collect_theme_tags_from_tagger_source,
@ -51,7 +51,7 @@ try:
    )
 except ModuleNotFoundError:
    # Fallback: direct relative import when running within scripts package context
-    from extract_themes import (  # type: ignore
+    from extract_themes import (
    BASE_COLORS,
    collect_theme_tags_from_constants,
    collect_theme_tags_from_tagger_source,
@ -66,7 +66,7 @@ except ModuleNotFoundError:
    )

 try:
-    from scripts.export_themes_to_yaml import slugify as slugify_theme  # type: ignore
+    from scripts.export_themes_to_yaml import slugify as slugify_theme
 except Exception:
    _SLUG_RE = re.compile(r'[^a-z0-9-]')

@ -951,7 +951,7 @@ def main():  # pragma: no cover
    if args.schema:
        # Lazy import to avoid circular dependency: replicate minimal schema inline from models file if present
        try:
-            from type_definitions_theme_catalog import ThemeCatalog  # type: ignore
+            from type_definitions_theme_catalog import ThemeCatalog
            import json as _json
            print(_json.dumps(ThemeCatalog.model_json_schema(), indent=2))
            return
@ -990,8 +990,8 @@ def main():  # pragma: no cover
            # Safeguard: if catalog dir missing, attempt to auto-export Phase A YAML first
            if not CATALOG_DIR.exists():  # pragma: no cover (environmental)
                try:
-                    from scripts.export_themes_to_yaml import main as export_main  # type: ignore
-                    export_main(['--force'])  # type: ignore[arg-type]
+                    from scripts.export_themes_to_yaml import main as export_main
+                    export_main(['--force'])
                except Exception as _e:
                    print(f"[build_theme_catalog] WARNING: catalog dir missing and auto export failed: {_e}", file=sys.stderr)
            if yaml is None:
@ -1013,7 +1013,7 @@ def main():  # pragma: no cover
                    meta_block = raw.get('metadata_info') if isinstance(raw.get('metadata_info'), dict) else {}
                    # Legacy migration: if no metadata_info but legacy provenance present, adopt it
                    if not meta_block and isinstance(raw.get('provenance'), dict):
-                        meta_block = raw.get('provenance')  # type: ignore
+                        meta_block = raw.get('provenance')
                        changed = True
                    if force or not meta_block.get('last_backfill'):
                        meta_block['last_backfill'] = time.strftime('%Y-%m-%dT%H:%M:%S')
--- a/code/scripts/check_random_theme_perf.py
+++ b/code/scripts/check_random_theme_perf.py
@ -1,118 +0,0 @@
-"""Opt-in guard that compares multi-theme filter performance to a stored baseline.
-
-Run inside the project virtual environment:
-
-    python -m code.scripts.check_random_theme_perf --baseline config/random_theme_perf_baseline.json
-
-The script executes the same profiling loop as `profile_multi_theme_filter` and fails
-if the observed mean or p95 timings regress more than the allowed threshold.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import sys
-from pathlib import Path
-from typing import Any, Dict, Tuple
-
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-DEFAULT_BASELINE = PROJECT_ROOT / "config" / "random_theme_perf_baseline.json"
-
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.append(str(PROJECT_ROOT))
-
-from code.scripts.profile_multi_theme_filter import run_profile  # type: ignore  # noqa: E402
-
-
-def _load_baseline(path: Path) -> Dict[str, Any]:
-    if not path.exists():
-        raise FileNotFoundError(f"Baseline file not found: {path}")
-    data = json.loads(path.read_text(encoding="utf-8"))
-    return data
-
-
-def _extract(metric: Dict[str, Any], key: str) -> float:
-    try:
-        value = float(metric.get(key, 0.0))
-    except Exception:
-        value = 0.0
-    return value
-
-
-def _check_section(name: str, actual: Dict[str, Any], baseline: Dict[str, Any], threshold: float) -> Tuple[bool, str]:
-    a_mean = _extract(actual, "mean_ms")
-    b_mean = _extract(baseline, "mean_ms")
-    a_p95 = _extract(actual, "p95_ms")
-    b_p95 = _extract(baseline, "p95_ms")
-
-    allowed_mean = b_mean * (1.0 + threshold)
-    allowed_p95 = b_p95 * (1.0 + threshold)
-
-    mean_ok = a_mean <= allowed_mean or b_mean == 0.0
-    p95_ok = a_p95 <= allowed_p95 or b_p95 == 0.0
-
-    status = mean_ok and p95_ok
-
-    def _format_row(label: str, actual_val: float, baseline_val: float, allowed_val: float, ok: bool) -> str:
-        trend = ((actual_val - baseline_val) / baseline_val * 100.0) if baseline_val else 0.0
-        trend_str = f"{trend:+.1f}%" if baseline_val else "n/a"
-        limit_str = f"≤ {allowed_val:.3f}ms" if baseline_val else "n/a"
-        return f"    {label:<6} actual={actual_val:.3f}ms baseline={baseline_val:.3f}ms ({trend_str}), limit {limit_str} -> {'OK' if ok else 'FAIL'}"
-
-    rows = [f"Section: {name}"]
-    rows.append(_format_row("mean", a_mean, b_mean, allowed_mean, mean_ok))
-    rows.append(_format_row("p95", a_p95, b_p95, allowed_p95, p95_ok))
-    return status, "\n".join(rows)
-
-
-def main(argv: list[str] | None = None) -> int:
-    parser = argparse.ArgumentParser(description="Check multi-theme filtering performance against a baseline")
-    parser.add_argument("--baseline", type=Path, default=DEFAULT_BASELINE, help="Baseline JSON file (default: config/random_theme_perf_baseline.json)")
-    parser.add_argument("--iterations", type=int, default=400, help="Number of iterations to sample (default: 400)")
-    parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
-    parser.add_argument("--threshold", type=float, default=0.15, help="Allowed regression threshold as a fraction (default: 0.15 = 15%)")
-    parser.add_argument("--update-baseline", action="store_true", help="Overwrite the baseline file with the newly collected metrics")
-    args = parser.parse_args(argv)
-
-    baseline_path = args.baseline if args.baseline else DEFAULT_BASELINE
-    if args.update_baseline and not baseline_path.parent.exists():
-        baseline_path.parent.mkdir(parents=True, exist_ok=True)
-
-    if not args.update_baseline:
-        baseline = _load_baseline(baseline_path)
-    else:
-        baseline = {}
-
-    results = run_profile(args.iterations, args.seed)
-
-    cascade_status, cascade_report = _check_section("cascade", results.get("cascade", {}), baseline.get("cascade", {}), args.threshold)
-    synergy_status, synergy_report = _check_section("synergy", results.get("synergy", {}), baseline.get("synergy", {}), args.threshold)
-
-    print("Iterations:", results.get("iterations"))
-    print("Seed:", results.get("seed"))
-    print(cascade_report)
-    print(synergy_report)
-
-    overall_ok = cascade_status and synergy_status
-
-    if args.update_baseline:
-        payload = {
-            "iterations": results.get("iterations"),
-            "seed": results.get("seed"),
-            "cascade": results.get("cascade"),
-            "synergy": results.get("synergy"),
-        }
-        baseline_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
-        print(f"Baseline updated → {baseline_path}")
-        return 0
-
-    if not overall_ok:
-        print(f"FAIL: performance regressions exceeded {args.threshold * 100:.1f}% threshold", file=sys.stderr)
-        return 1
-
-    print("PASS: performance within allowed threshold")
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main())
--- a/code/scripts/enrich_themes.py
+++ b/code/scripts/enrich_themes.py
@ -0,0 +1,135 @@
+"""CLI wrapper for theme enrichment pipeline.
+
+Runs the consolidated theme enrichment pipeline with command-line options.
+For backward compatibility, individual scripts can still be run separately,
+but this provides a faster single-pass alternative.
+
+Usage:
+    python code/scripts/enrich_themes.py --write
+    python code/scripts/enrich_themes.py --dry-run --enforce-min
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+ROOT = Path(__file__).resolve().parents[2]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+# Import after adding to path
+from code.tagging.theme_enrichment import run_enrichment_pipeline  # noqa: E402
+
+
+def main() -> int:
+    """Run theme enrichment pipeline from CLI."""
+    parser = argparse.ArgumentParser(
+        description='Consolidated theme metadata enrichment pipeline',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Dry run (no changes written):
+  python code/scripts/enrich_themes.py --dry-run
+  
+  # Write changes:
+  python code/scripts/enrich_themes.py --write
+  
+  # Enforce minimum examples (errors if insufficient):
+  python code/scripts/enrich_themes.py --write --enforce-min
+  
+  # Strict validation for cornerstone themes:
+  python code/scripts/enrich_themes.py --write --strict
+
+Note: This replaces running 7 separate scripts (autofill, pad, cleanup, purge,
+augment, suggestions, lint) with a single 5-10x faster operation.
+        """
+    )
+    
+    parser.add_argument(
+        '--write',
+        action='store_true',
+        help='Write changes to disk (default: dry run)'
+    )
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Dry run mode: show what would be changed without writing'
+    )
+    parser.add_argument(
+        '--min',
+        '--min-examples',
+        type=int,
+        default=None,
+        metavar='N',
+        help='Minimum number of example commanders (default: $EDITORIAL_MIN_EXAMPLES or 5)'
+    )
+    parser.add_argument(
+        '--enforce-min',
+        action='store_true',
+        help='Treat minimum examples violations as errors'
+    )
+    parser.add_argument(
+        '--strict',
+        action='store_true',
+        help='Enable strict validation (cornerstone themes must have examples)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Determine write mode
+    if args.dry_run:
+        write = False
+    elif args.write:
+        write = True
+    else:
+        # Default to dry run if neither specified
+        write = False
+        print("Note: Running in dry-run mode (use --write to save changes)\n")
+    
+    # Get minimum examples threshold
+    if args.min is not None:
+        min_examples = args.min
+    else:
+        min_examples = int(os.environ.get('EDITORIAL_MIN_EXAMPLES', '5'))
+    
+    print("Theme Enrichment Pipeline")
+    print("========================")
+    print(f"Mode: {'WRITE' if write else 'DRY RUN'}")
+    print(f"Min examples: {min_examples}")
+    print(f"Enforce min: {args.enforce_min}")
+    print(f"Strict: {args.strict}")
+    print()
+    
+    try:
+        stats = run_enrichment_pipeline(
+            root=ROOT,
+            min_examples=min_examples,
+            write=write,
+            enforce_min=args.enforce_min,
+            strict=args.strict,
+            progress_callback=None,  # Use default print
+        )
+        
+        # Return non-zero if there are lint errors
+        if stats.lint_errors > 0:
+            print(f"\n❌ Enrichment completed with {stats.lint_errors} error(s)")
+            return 1
+        
+        print("\n✅ Enrichment completed successfully")
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        return 130
+    except Exception as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        if '--debug' in sys.argv:
+            raise
+        return 1
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
--- a/code/scripts/export_themes_to_yaml.py
+++ b/code/scripts/export_themes_to_yaml.py
@ -41,7 +41,7 @@ SCRIPT_ROOT = Path(__file__).resolve().parent
 CODE_ROOT = SCRIPT_ROOT.parent
 if str(CODE_ROOT) not in sys.path:
    sys.path.insert(0, str(CODE_ROOT))
-from scripts.extract_themes import derive_synergies_for_tags  # type: ignore
+from scripts.extract_themes import derive_synergies_for_tags

 ROOT = Path(__file__).resolve().parents[2]
 THEME_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'
@ -123,6 +123,9 @@ def main():
        enforced_set = set(enforced_synergies)
        inferred_synergies = [s for s in synergy_list if s not in curated_set and s not in enforced_set]

+        example_cards_value = entry.get('example_cards', [])
+        example_commanders_value = entry.get('example_commanders', [])
+        
        doc = {
            'id': slug,
            'display_name': theme_name,
@ -132,13 +135,40 @@ def main():
            'inferred_synergies': inferred_synergies,
            'primary_color': entry.get('primary_color'),
            'secondary_color': entry.get('secondary_color'),
+            'example_cards': example_cards_value,
+            'example_commanders': example_commanders_value,
+            'synergy_example_cards': entry.get('synergy_example_cards', []),
+            'synergy_commanders': entry.get('synergy_commanders', []),
+            'deck_archetype': entry.get('deck_archetype'),
+            'popularity_hint': entry.get('popularity_hint'),
+            'popularity_bucket': entry.get('popularity_bucket'),
+            'editorial_quality': entry.get('editorial_quality'),
+            'description': entry.get('description'),
            'notes': ''
        }
-        # Drop None color keys for cleanliness
+        # Drop None/empty keys for cleanliness
        if doc['primary_color'] is None:
            doc.pop('primary_color')
        if doc.get('secondary_color') is None:
            doc.pop('secondary_color')
+        if not doc.get('example_cards'):
+            doc.pop('example_cards')
+        if not doc.get('example_commanders'):
+            doc.pop('example_commanders')
+        if not doc.get('synergy_example_cards'):
+            doc.pop('synergy_example_cards')
+        if not doc.get('synergy_commanders'):
+            doc.pop('synergy_commanders')
+        if doc.get('deck_archetype') is None:
+            doc.pop('deck_archetype')
+        if doc.get('popularity_hint') is None:
+            doc.pop('popularity_hint')
+        if doc.get('popularity_bucket') is None:
+            doc.pop('popularity_bucket')
+        if doc.get('editorial_quality') is None:
+            doc.pop('editorial_quality')
+        if doc.get('description') is None:
+            doc.pop('description')
        with path.open('w', encoding='utf-8') as f:
            yaml.safe_dump(doc, f, sort_keys=False, allow_unicode=True)
        exported += 1
--- a/code/scripts/extract_themes.py
+++ b/code/scripts/extract_themes.py
@ -18,8 +18,8 @@ ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

-from code.settings import CSV_DIRECTORY  # type: ignore
-from code.tagging import tag_constants  # type: ignore
+from code.settings import CSV_DIRECTORY
+from code.tagging import tag_constants

 BASE_COLORS = {
    'white': 'W',
@ -126,7 +126,7 @@ def tally_tag_frequencies_by_base_color() -> Dict[str, Dict[str, int]]:
            return derived
        # Iterate rows
        for _, row in df.iterrows():
-            tags = row['themeTags'] if isinstance(row['themeTags'], list) else []
+            tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else []
            # Compute base colors contribution
            ci = row['colorIdentity'] if 'colorIdentity' in row else None
            letters = set(ci) if isinstance(ci, list) else set()
@ -162,7 +162,7 @@ def gather_theme_tag_rows() -> List[List[str]]:
        if 'themeTags' not in df.columns:
            continue
        for _, row in df.iterrows():
-            tags = row['themeTags'] if isinstance(row['themeTags'], list) else []
+            tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else []
            if tags:
                rows.append(tags)
    return rows
@ -523,3 +523,4 @@ def main() -> None:

 if __name__ == "__main__":
    main()
+
--- a/code/scripts/generate_theme_catalog.py
+++ b/code/scripts/generate_theme_catalog.py
@ -19,16 +19,26 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Sequence

+try:
+    import pandas as pd
+    HAS_PANDAS = True
+except ImportError:
+    HAS_PANDAS = False
+    pd = None  # type: ignore
+
 ROOT = Path(__file__).resolve().parents[2]
 CODE_ROOT = ROOT / "code"
 if str(CODE_ROOT) not in sys.path:
    sys.path.insert(0, str(CODE_ROOT))

 try:
-    from code.settings import CSV_DIRECTORY as DEFAULT_CSV_DIRECTORY  # type: ignore
+    from code.settings import CSV_DIRECTORY as DEFAULT_CSV_DIRECTORY
 except Exception:  # pragma: no cover - fallback for adhoc execution
    DEFAULT_CSV_DIRECTORY = "csv_files"

+# Parquet support requires pandas (imported at top of file, uses pyarrow under the hood)
+HAS_PARQUET_SUPPORT = HAS_PANDAS
+
 DEFAULT_OUTPUT_PATH = ROOT / "config" / "themes" / "theme_catalog.csv"
 HEADER_COMMENT_PREFIX = "# theme_catalog"

@ -63,6 +73,12 @@ def canonical_key(raw: str) -> str:
 def parse_theme_tags(value: object) -> List[str]:
    if value is None:
        return []
+    # Handle numpy arrays (from Parquet files)
+    if hasattr(value, '__array__') or hasattr(value, 'tolist'):
+        try:
+            value = value.tolist() if hasattr(value, 'tolist') else list(value)
+        except Exception:
+            pass
    if isinstance(value, list):
        return [str(v) for v in value if isinstance(v, str) and v.strip()]
    if isinstance(value, str):
@ -87,33 +103,77 @@ def parse_theme_tags(value: object) -> List[str]:
    return []


-def _load_theme_counts(csv_path: Path, theme_variants: Dict[str, set[str]]) -> Counter[str]:
+def _load_theme_counts_from_parquet(
+    parquet_path: Path,
+    theme_variants: Dict[str, set[str]]
+) -> Counter[str]:
+    """Load theme counts from a parquet file using pandas (which uses pyarrow).
+    
+    Args:
+        parquet_path: Path to the parquet file (commander_cards.parquet or all_cards.parquet)
+        theme_variants: Dict to accumulate theme name variants
+        
+    Returns:
+        Counter of theme occurrences
+    """
+    if pd is None:
+        print("  pandas not available, skipping parquet load")
+        return Counter()
+    
    counts: Counter[str] = Counter()
-    if not csv_path.exists():
+    
+    if not parquet_path.exists():
+        print(f"  Parquet file does not exist: {parquet_path}")
        return counts
-    with csv_path.open("r", encoding="utf-8-sig", newline="") as handle:
-        reader = csv.DictReader(handle)
-        if not reader.fieldnames or "themeTags" not in reader.fieldnames:
-            return counts
-        for row in reader:
-            raw_value = row.get("themeTags")
-            tags = parse_theme_tags(raw_value)
-            if not tags:
+    
+    # Read only themeTags column for efficiency
+    try:
+        df = pd.read_parquet(parquet_path, columns=["themeTags"])
+        print(f"  Loaded {len(df)} rows from parquet")
+    except Exception as e:
+        # If themeTags column doesn't exist, return empty
+        print(f"  Failed to read themeTags column: {e}")
+        return counts
+    
+    # Convert to list for fast iteration (faster than iterrows)
+    theme_tags_list = df["themeTags"].tolist()
+    
+    # Debug: check first few entries
+    non_empty_count = 0
+    for i, raw_value in enumerate(theme_tags_list[:10]):
+        if raw_value is not None and not (isinstance(raw_value, float) and pd.isna(raw_value)):
+            non_empty_count += 1
+            if i < 3:  # Show first 3 non-empty
+                print(f"    Sample tag {i}: {raw_value!r} (type: {type(raw_value).__name__})")
+    
+    if non_empty_count == 0:
+        print("  WARNING: No non-empty themeTags found in first 10 rows")
+    
+    for raw_value in theme_tags_list:
+        if raw_value is None or (isinstance(raw_value, float) and pd.isna(raw_value)):
+            continue
+        tags = parse_theme_tags(raw_value)
+        if not tags:
+            continue
+        seen_in_row: set[str] = set()
+        for tag in tags:
+            display = normalize_theme_display(tag)
+            if not display:
                continue
-            seen_in_row: set[str] = set()
-            for tag in tags:
-                display = normalize_theme_display(tag)
-                if not display:
-                    continue
-                key = canonical_key(display)
-                if key in seen_in_row:
-                    continue
-                seen_in_row.add(key)
-                counts[key] += 1
-                theme_variants[key].add(display)
+            key = canonical_key(display)
+            if key in seen_in_row:
+                continue
+            seen_in_row.add(key)
+            counts[key] += 1
+            theme_variants[key].add(display)
+    
+    print(f"  Found {len(counts)} unique themes from parquet")
    return counts


+# CSV fallback removed in M4 migration - Parquet is now required
+
+
 def _select_display_name(options: Sequence[str]) -> str:
    if not options:
        return ""
@ -143,27 +203,95 @@ def build_theme_catalog(
    output_path: Path,
    *,
    generated_at: Optional[datetime] = None,
-    commander_filename: str = "commander_cards.csv",
-    cards_filename: str = "cards.csv",
    logs_directory: Optional[Path] = None,
+    min_card_count: int = 3,
 ) -> CatalogBuildResult:
+    """Build theme catalog from Parquet card data.
+    
+    Args:
+        csv_directory: Base directory (used to locate card_files/processed/all_cards.parquet)
+        output_path: Where to write the catalog CSV
+        generated_at: Optional timestamp for generation
+        logs_directory: Optional directory to copy output to
+        min_card_count: Minimum number of cards required to include theme (default: 3)
+        
+    Returns:
+        CatalogBuildResult with generated rows and metadata
+        
+    Raises:
+        RuntimeError: If pandas/pyarrow not available
+        FileNotFoundError: If all_cards.parquet doesn't exist
+        RuntimeError: If no theme tags found in Parquet file
+    """
    csv_directory = csv_directory.resolve()
    output_path = output_path.resolve()

    theme_variants: Dict[str, set[str]] = defaultdict(set)

-    commander_counts = _load_theme_counts(csv_directory / commander_filename, theme_variants)
-
-    card_counts: Counter[str] = Counter()
-    cards_path = csv_directory / cards_filename
-    if cards_path.exists():
-        card_counts = _load_theme_counts(cards_path, theme_variants)
+    # Parquet-only mode (M4 migration: CSV files removed)
+    if not HAS_PARQUET_SUPPORT:
+        raise RuntimeError(
+            "Pandas is required for theme catalog generation. "
+            "Install with: pip install pandas pyarrow"
+        )
+    
+    # Use processed parquet files (M4 migration)
+    parquet_dir = csv_directory.parent / "card_files" / "processed"
+    all_cards_parquet = parquet_dir / "all_cards.parquet"
+    
+    print(f"Loading theme data from parquet: {all_cards_parquet}")
+    print(f"  File exists: {all_cards_parquet.exists()}")
+    
+    if not all_cards_parquet.exists():
+        raise FileNotFoundError(
+            f"Required Parquet file not found: {all_cards_parquet}\n"
+            f"Run tagging first: python -c \"from code.tagging.tagger import run_tagging; run_tagging()\""
+        )
+    
+    # Load all card counts from all_cards.parquet (includes commanders)
+    card_counts = _load_theme_counts_from_parquet(
+        all_cards_parquet, theme_variants=theme_variants
+    )
+    
+    # For commander counts, filter all_cards by isCommander column
+    df_commanders = pd.read_parquet(all_cards_parquet)
+    if 'isCommander' in df_commanders.columns:
+        df_commanders = df_commanders[df_commanders['isCommander']]
    else:
-        # Fallback: scan all *_cards.csv except commander
-        for candidate in csv_directory.glob("*_cards.csv"):
-            if candidate.name == commander_filename:
+        # Fallback: assume all cards could be commanders if column missing
+        pass
+    commander_counts = Counter()
+    for tags in df_commanders['themeTags'].tolist():
+        if tags is None or (isinstance(tags, float) and pd.isna(tags)):
+            continue
+        # Functions are defined at top of this file, no import needed
+        parsed = parse_theme_tags(tags)
+        if not parsed:
+            continue
+        seen = set()
+        for tag in parsed:
+            display = normalize_theme_display(tag)
+            if not display:
                continue
-            card_counts += _load_theme_counts(candidate, theme_variants)
+            key = canonical_key(display)
+            if key not in seen:
+                seen.add(key)
+                commander_counts[key] += 1
+                theme_variants[key].add(display)
+    
+    # Verify we found theme tags
+    total_themes_found = len(card_counts) + len(commander_counts)
+    if total_themes_found == 0:
+        raise RuntimeError(
+            f"No theme tags found in {all_cards_parquet}\n"
+            f"The Parquet file exists but contains no themeTags data. "
+            f"This usually means tagging hasn't completed or failed.\n"
+            f"Check that 'themeTags' column exists and is populated."
+        )
+    
+    print("✓ Loaded theme data from parquet files")
+    print(f"  - Commanders: {len(commander_counts)} themes")
+    print(f"  - All cards: {len(card_counts)} themes")

    keys = sorted(set(card_counts.keys()) | set(commander_counts.keys()))
    generated_at_iso = _derive_generated_at(generated_at)
@ -171,12 +299,19 @@ def build_theme_catalog(
    version_hash = _compute_version_hash(display_names)

    rows: List[CatalogRow] = []
+    filtered_count = 0
    for key, display in zip(keys, display_names):
        if not display:
            continue
        card_count = int(card_counts.get(key, 0))
        commander_count = int(commander_counts.get(key, 0))
        source_count = card_count + commander_count
+        
+        # Filter out themes below minimum threshold
+        if source_count < min_card_count:
+            filtered_count += 1
+            continue
+        
        rows.append(
            CatalogRow(
                theme=display,
@ -216,6 +351,9 @@ def build_theme_catalog(
                row.version,
            ])

+    if filtered_count > 0:
+        print(f"  Filtered {filtered_count} themes with <{min_card_count} cards")
+
    if logs_directory is not None:
        logs_directory = logs_directory.resolve()
        logs_directory.mkdir(parents=True, exist_ok=True)
@ -262,6 +400,13 @@ def main(argv: Optional[Sequence[str]] = None) -> CatalogBuildResult:
        default=None,
        help="Optional directory to mirror the generated catalog for diffing (e.g., logs/generated)",
    )
+    parser.add_argument(
+        "--min-cards",
+        dest="min_cards",
+        type=int,
+        default=3,
+        help="Minimum number of cards required to include theme (default: 3)",
+    )
    args = parser.parse_args(argv)

    csv_dir = _resolve_csv_directory(str(args.csv_dir) if args.csv_dir else None)
@ -269,6 +414,7 @@ def main(argv: Optional[Sequence[str]] = None) -> CatalogBuildResult:
        csv_directory=csv_dir,
        output_path=args.output,
        logs_directory=args.logs_dir,
+        min_card_count=args.min_cards,
    )
    print(
        f"Generated {len(result.rows)} themes -> {result.output_path} (version={result.version})",
--- a/code/scripts/inspect_parquet.py
+++ b/code/scripts/inspect_parquet.py
@ -0,0 +1,104 @@
+"""Inspect MTGJSON Parquet file schema and compare to CSV."""
+
+import pandas as pd
+import os
+import sys
+
+def inspect_parquet():
+    """Load and inspect Parquet file."""
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    if not os.path.exists(parquet_path):
+        print(f"Error: {parquet_path} not found")
+        return
+    
+    print("Loading Parquet file...")
+    df = pd.read_parquet(parquet_path)
+    
+    print("\n=== PARQUET FILE INFO ===")
+    print(f"Rows: {len(df):,}")
+    print(f"Columns: {len(df.columns)}")
+    print(f"File size: {os.path.getsize(parquet_path) / 1024 / 1024:.2f} MB")
+    
+    print("\n=== PARQUET COLUMNS AND TYPES ===")
+    for col in sorted(df.columns):
+        dtype = str(df[col].dtype)
+        non_null = df[col].notna().sum()
+        null_pct = (1 - non_null / len(df)) * 100
+        print(f"  {col:30s} {dtype:15s} ({null_pct:5.1f}% null)")
+    
+    print("\n=== SAMPLE DATA (first card) ===")
+    first_card = df.iloc[0].to_dict()
+    for key, value in sorted(first_card.items()):
+        if isinstance(value, (list, dict)):
+            print(f"  {key}: {type(value).__name__} with {len(value)} items")
+        else:
+            value_str = str(value)[:80]
+            print(f"  {key}: {value_str}")
+    
+    return df
+
+def compare_to_csv():
+    """Compare Parquet columns to CSV columns."""
+    csv_path = 'csv_files/cards.csv'
+    parquet_path = 'csv_files/cards_parquet_test.parquet'
+    
+    if not os.path.exists(csv_path):
+        print(f"\nNote: {csv_path} not found, skipping comparison")
+        return
+    
+    print("\n\n=== CSV FILE INFO ===")
+    print("Loading CSV file...")
+    df_csv = pd.read_csv(csv_path, low_memory=False, nrows=1)
+    
+    csv_size = os.path.getsize(csv_path) / 1024 / 1024
+    print(f"File size: {csv_size:.2f} MB")
+    print(f"Columns: {len(df_csv.columns)}")
+    
+    print("\n=== CSV COLUMNS ===")
+    csv_cols = set(df_csv.columns)
+    for col in sorted(df_csv.columns):
+        print(f"  {col}")
+    
+    # Load parquet columns
+    df_parquet = pd.read_parquet(parquet_path)
+    parquet_cols = set(df_parquet.columns)
+    
+    print("\n\n=== SCHEMA COMPARISON ===")
+    
+    # Columns in both
+    common = csv_cols & parquet_cols
+    print(f"\n✓ Columns in both (n={len(common)}):")
+    for col in sorted(common):
+        csv_type = str(df_csv[col].dtype)
+        parquet_type = str(df_parquet[col].dtype)
+        if csv_type != parquet_type:
+            print(f"  {col:30s} CSV: {csv_type:15s} Parquet: {parquet_type}")
+        else:
+            print(f"  {col:30s} {csv_type}")
+    
+    # CSV only
+    csv_only = csv_cols - parquet_cols
+    if csv_only:
+        print(f"\n⚠ Columns only in CSV (n={len(csv_only)}):")
+        for col in sorted(csv_only):
+            print(f"  {col}")
+    
+    # Parquet only
+    parquet_only = parquet_cols - csv_cols
+    if parquet_only:
+        print(f"\n✓ Columns only in Parquet (n={len(parquet_only)}):")
+        for col in sorted(parquet_only):
+            print(f"  {col}")
+    
+    # File size comparison
+    parquet_size = os.path.getsize(parquet_path) / 1024 / 1024
+    size_reduction = (1 - parquet_size / csv_size) * 100
+    print(f"\n=== FILE SIZE COMPARISON ===")
+    print(f"CSV:     {csv_size:.2f} MB")
+    print(f"Parquet: {parquet_size:.2f} MB")
+    print(f"Savings: {size_reduction:.1f}%")
+
+if __name__ == "__main__":
+    df = inspect_parquet()
+    compare_to_csv()
--- a/code/scripts/preview_dfc_catalog_diff.py
+++ b/code/scripts/preview_dfc_catalog_diff.py
@ -1,305 +0,0 @@
-"""Catalog diff helper for verifying multi-face merge output.
-
-This utility regenerates the card CSV catalog (optionally writing compatibility
-snapshots) and then compares the merged outputs against the baseline snapshots.
-It is intended to support the MDFC rollout checklist by providing a concise summary
-of how many rows were merged, which cards collapsed into a single record, and
-whether any tag unions diverge from expectations.
-
-Example usage (from repo root, inside virtualenv):
-
-    python -m code.scripts.preview_dfc_catalog_diff --compat-snapshot --output logs/dfc_catalog_diff.json
-
-The script prints a human readable summary to stdout and optionally writes a JSON
-artifact for release/staging review.
-"""
-from __future__ import annotations
-
-import argparse
-import ast
-import importlib
-import json
-import os
-import sys
-import time
-from collections import Counter
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Sequence
-
-import pandas as pd
-
-from settings import COLORS, CSV_DIRECTORY
-
-DEFAULT_COMPAT_DIR = Path(os.getenv("DFC_COMPAT_DIR", "csv_files/compat_faces"))
-CSV_ROOT = Path(CSV_DIRECTORY)
-
-
-def _parse_list_cell(value: Any) -> List[str]:
-    """Convert serialized list cells ("['A', 'B']") into Python lists."""
-    if isinstance(value, list):
-        return [str(item) for item in value]
-    if value is None:
-        return []
-    if isinstance(value, float) and pd.isna(value):  # type: ignore[arg-type]
-        return []
-    text = str(value).strip()
-    if not text:
-        return []
-    try:
-        parsed = ast.literal_eval(text)
-    except (SyntaxError, ValueError):
-        return [text]
-    if isinstance(parsed, list):
-        return [str(item) for item in parsed]
-    return [str(parsed)]
-
-
-def _load_catalog(path: Path) -> pd.DataFrame:
-    if not path.exists():
-        raise FileNotFoundError(f"Catalog file missing: {path}")
-    df = pd.read_csv(path)
-    for column in ("themeTags", "keywords", "creatureTypes"):
-        if column in df.columns:
-            df[column] = df[column].apply(_parse_list_cell)
-    return df
-
-
-def _multi_face_names(df: pd.DataFrame) -> List[str]:
-    counts = Counter(df.get("name", []))
-    return [name for name, count in counts.items() if isinstance(name, str) and count > 1]
-
-
-def _collect_tags(series: Iterable[List[str]]) -> List[str]:
-    tags: List[str] = []
-    for value in series:
-        if isinstance(value, list):
-            tags.extend(str(item) for item in value)
-    return sorted(set(tags))
-
-
-def _summarize_color(
-    color: str,
-    merged: pd.DataFrame,
-    baseline: pd.DataFrame,
-    sample_size: int,
-) -> Dict[str, Any]:
-    merged_names = set(merged.get("name", []))
-    baseline_names = list(baseline.get("name", []))
-    baseline_name_set = set(name for name in baseline_names if isinstance(name, str))
-
-    multi_face = _multi_face_names(baseline)
-    collapsed = []
-    tag_mismatches: List[str] = []
-    missing_after_merge: List[str] = []
-
-    for name in multi_face:
-        group = baseline[baseline["name"] == name]
-        merged_row = merged[merged["name"] == name]
-        if merged_row.empty:
-            missing_after_merge.append(name)
-            continue
-        expected_tags = _collect_tags(group["themeTags"]) if "themeTags" in group else []
-        merged_tags = _collect_tags(merged_row.iloc[[0]]["themeTags"]) if "themeTags" in merged_row else []
-        if expected_tags != merged_tags:
-            tag_mismatches.append(name)
-        collapsed.append(name)
-
-    removed_names = sorted(baseline_name_set - merged_names)
-    added_names = sorted(merged_names - baseline_name_set)
-
-    return {
-        "rows_merged": len(merged),
-        "rows_baseline": len(baseline),
-        "row_delta": len(merged) - len(baseline),
-        "multi_face_groups": len(multi_face),
-        "collapsed_sample": collapsed[:sample_size],
-        "tag_union_mismatches": tag_mismatches[:sample_size],
-        "missing_after_merge": missing_after_merge[:sample_size],
-        "removed_names": removed_names[:sample_size],
-        "added_names": added_names[:sample_size],
-    }
-
-
-def _refresh_catalog(colors: Sequence[str], compat_snapshot: bool) -> None:
-    os.environ.pop("ENABLE_DFC_MERGE", None)
-    os.environ["DFC_COMPAT_SNAPSHOT"] = "1" if compat_snapshot else "0"
-    importlib.invalidate_caches()
-    # Reload tagger to pick up the new env var
-    tagger = importlib.import_module("code.tagging.tagger")
-    tagger = importlib.reload(tagger)  # type: ignore[assignment]
-
-    for color in colors:
-        tagger.load_dataframe(color)
-
-
-def generate_diff(
-    colors: Sequence[str],
-    compat_dir: Path,
-    sample_size: int,
-) -> Dict[str, Any]:
-    per_color: Dict[str, Any] = {}
-    overall = {
-        "total_rows_merged": 0,
-        "total_rows_baseline": 0,
-        "total_multi_face_groups": 0,
-        "colors": len(colors),
-        "tag_union_mismatches": 0,
-        "missing_after_merge": 0,
-    }
-
-    for color in colors:
-        merged_path = CSV_ROOT / f"{color}_cards.csv"
-        baseline_path = compat_dir / f"{color}_cards_unmerged.csv"
-        merged_df = _load_catalog(merged_path)
-        baseline_df = _load_catalog(baseline_path)
-        summary = _summarize_color(color, merged_df, baseline_df, sample_size)
-        per_color[color] = summary
-        overall["total_rows_merged"] += summary["rows_merged"]
-        overall["total_rows_baseline"] += summary["rows_baseline"]
-        overall["total_multi_face_groups"] += summary["multi_face_groups"]
-        overall["tag_union_mismatches"] += len(summary["tag_union_mismatches"])
-        overall["missing_after_merge"] += len(summary["missing_after_merge"])
-
-    overall["row_delta_total"] = overall["total_rows_merged"] - overall["total_rows_baseline"]
-    return {"overall": overall, "per_color": per_color}
-
-
-def main(argv: List[str]) -> int:
-    parser = argparse.ArgumentParser(description="Preview merged vs baseline DFC catalog diff")
-    parser.add_argument(
-        "--skip-refresh",
-        action="store_true",
-        help="Skip rebuilding the catalog in compatibility mode (requires existing compat snapshots)",
-    )
-    parser.add_argument(
-        "--mode",
-        default="",
-        help="[Deprecated] Legacy ENABLE_DFC_MERGE value (compat|1|0 etc.)",
-    )
-    parser.add_argument(
-        "--compat-snapshot",
-        dest="compat_snapshot",
-        action="store_true",
-        help="Write compatibility snapshots before diffing (default: off unless legacy --mode compat)",
-    )
-    parser.add_argument(
-        "--no-compat-snapshot",
-        dest="compat_snapshot",
-        action="store_false",
-        help="Skip compatibility snapshots even if legacy --mode compat is supplied",
-    )
-    parser.set_defaults(compat_snapshot=None)
-    parser.add_argument(
-        "--colors",
-        nargs="*",
-        help="Optional subset of colors to diff (defaults to full COLORS list)",
-    )
-    parser.add_argument(
-        "--compat-dir",
-        type=Path,
-        default=DEFAULT_COMPAT_DIR,
-        help="Directory containing unmerged compatibility snapshots (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--output",
-        type=Path,
-        help="Optional JSON file to write with the diff summary",
-    )
-    parser.add_argument(
-        "--sample-size",
-        type=int,
-        default=10,
-        help="Number of sample entries to include per section (default: %(default)s)",
-    )
-    args = parser.parse_args(argv)
-
-    colors = tuple(args.colors) if args.colors else tuple(COLORS)
-    compat_dir = args.compat_dir
-
-    mode = str(args.mode or "").strip().lower()
-    if mode and mode not in {"compat", "dual", "both", "1", "on", "true", "0", "off", "false", "disabled"}:
-        print(
-            f"ℹ Legacy --mode value '{mode}' detected; merge remains enabled. Use --compat-snapshot as needed.",
-            flush=True,
-        )
-
-    if args.compat_snapshot is None:
-        compat_snapshot = mode in {"compat", "dual", "both"}
-    else:
-        compat_snapshot = args.compat_snapshot
-        if mode:
-            print(
-                "ℹ Ignoring deprecated --mode value because --compat-snapshot/--no-compat-snapshot was supplied.",
-                flush=True,
-            )
-
-    if mode in {"0", "off", "false", "disabled"}:
-        print(
-            "⚠ ENABLE_DFC_MERGE=off is deprecated; the merge remains enabled regardless of the value.",
-            flush=True,
-        )
-
-    if not args.skip_refresh:
-        start = time.perf_counter()
-        _refresh_catalog(colors, compat_snapshot)
-        duration = time.perf_counter() - start
-        snapshot_msg = "with compat snapshot" if compat_snapshot else "merged-only"
-        print(f"✔ Refreshed catalog in {duration:.1f}s ({snapshot_msg})")
-    else:
-        print("ℹ Using existing catalog outputs (refresh skipped)")
-
-    try:
-        diff = generate_diff(colors, compat_dir, args.sample_size)
-    except FileNotFoundError as exc:
-        print(f"ERROR: {exc}")
-        print("Run without --skip-refresh (or ensure compat snapshots exist).", file=sys.stderr)
-        return 2
-
-    overall = diff["overall"]
-    print("\n=== DFC Catalog Diff Summary ===")
-    print(
-        f"Merged rows: {overall['total_rows_merged']:,} | Baseline rows: {overall['total_rows_baseline']:,} | "
-        f"Δ rows: {overall['row_delta_total']:,}"
-    )
-    print(
-        f"Multi-face groups: {overall['total_multi_face_groups']:,} | "
-        f"Tag union mismatches: {overall['tag_union_mismatches']} | Missing after merge: {overall['missing_after_merge']}"
-    )
-
-    for color, summary in diff["per_color"].items():
-        print(f"\n[{color}] baseline={summary['rows_baseline']} merged={summary['rows_merged']} Δ={summary['row_delta']}")
-        if summary["multi_face_groups"]:
-            print(f"  multi-face groups: {summary['multi_face_groups']}")
-        if summary["collapsed_sample"]:
-            sample = ", ".join(summary["collapsed_sample"][:3])
-            print(f"  collapsed sample: {sample}")
-        if summary["tag_union_mismatches"]:
-            print(f"  TAG MISMATCH sample: {', '.join(summary['tag_union_mismatches'])}")
-        if summary["missing_after_merge"]:
-            print(f"  MISSING sample: {', '.join(summary['missing_after_merge'])}")
-        if summary["removed_names"]:
-            print(f"  removed sample: {', '.join(summary['removed_names'])}")
-        if summary["added_names"]:
-            print(f"  added sample: {', '.join(summary['added_names'])}")
-
-    if args.output:
-        payload = {
-            "captured_at": int(time.time()),
-            "mode": args.mode,
-            "colors": colors,
-            "compat_dir": str(compat_dir),
-            "summary": diff,
-        }
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            args.output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
-            print(f"\n📄 Wrote JSON summary to {args.output}")
-        except Exception as exc:  # pragma: no cover
-            print(f"Failed to write output file {args.output}: {exc}", file=sys.stderr)
-            return 3
-
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_metrics_snapshot.py
+++ b/code/scripts/preview_metrics_snapshot.py
@ -1,105 +0,0 @@
-"""CLI utility: snapshot preview metrics and emit summary/top slow themes.
-
-Usage (from repo root virtualenv):
-  python -m code.scripts.preview_metrics_snapshot --limit 10 --output logs/preview_metrics_snapshot.json
-
-Fetches /themes/metrics (requires WEB_THEME_PICKER_DIAGNOSTICS=1) and writes a compact JSON plus
-human-readable summary to stdout.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import sys
-import time
-from pathlib import Path
-from typing import Any, Dict
-
-import urllib.request
-import urllib.error
-
-DEFAULT_URL = "http://localhost:8000/themes/metrics"
-
-
-def fetch_metrics(url: str) -> Dict[str, Any]:
-    req = urllib.request.Request(url, headers={"Accept": "application/json"})
-    with urllib.request.urlopen(req, timeout=10) as resp:  # nosec B310 (local trusted)
-        data = resp.read().decode("utf-8", "replace")
-    try:
-        return json.loads(data)  # type: ignore[return-value]
-    except json.JSONDecodeError as e:  # pragma: no cover - unlikely if server OK
-        raise SystemExit(f"Invalid JSON from metrics endpoint: {e}\nRaw: {data[:400]}")
-
-
-def summarize(metrics: Dict[str, Any], top_n: int) -> Dict[str, Any]:
-    preview = (metrics.get("preview") or {}) if isinstance(metrics, dict) else {}
-    per_theme = preview.get("per_theme") or {}
-    # Compute top slow themes by avg_ms
-    items = []
-    for slug, info in per_theme.items():
-        if not isinstance(info, dict):
-            continue
-        avg = info.get("avg_ms")
-        if isinstance(avg, (int, float)):
-            items.append((slug, float(avg), info))
-    items.sort(key=lambda x: x[1], reverse=True)
-    top = items[:top_n]
-    return {
-        "preview_requests": preview.get("preview_requests"),
-        "preview_cache_hits": preview.get("preview_cache_hits"),
-        "preview_avg_build_ms": preview.get("preview_avg_build_ms"),
-        "preview_p95_build_ms": preview.get("preview_p95_build_ms"),
-        "preview_ttl_seconds": preview.get("preview_ttl_seconds"),
-        "editorial_curated_vs_sampled_pct": preview.get("editorial_curated_vs_sampled_pct"),
-        "top_slowest": [
-            {
-                "slug": slug,
-                "avg_ms": avg,
-                "p95_ms": info.get("p95_ms"),
-                "builds": info.get("builds"),
-                "requests": info.get("requests"),
-                "avg_curated_pct": info.get("avg_curated_pct"),
-            }
-            for slug, avg, info in top
-        ],
-    }
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Snapshot preview metrics")
-    ap.add_argument("--url", default=DEFAULT_URL, help="Metrics endpoint URL (default: %(default)s)")
-    ap.add_argument("--limit", type=int, default=10, help="Top N slow themes to include (default: %(default)s)")
-    ap.add_argument("--output", type=Path, help="Optional output JSON file for snapshot")
-    ap.add_argument("--quiet", action="store_true", help="Suppress stdout summary (still writes file if --output)")
-    args = ap.parse_args(argv)
-
-    try:
-        raw = fetch_metrics(args.url)
-    except urllib.error.URLError as e:
-        print(f"ERROR: Failed fetching metrics endpoint: {e}", file=sys.stderr)
-        return 2
-
-    summary = summarize(raw, args.limit)
-    snapshot = {
-        "captured_at": int(time.time()),
-        "source": args.url,
-        "summary": summary,
-    }
-
-    if args.output:
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True), encoding="utf-8")
-        except Exception as e:  # pragma: no cover
-            print(f"ERROR: writing snapshot file failed: {e}", file=sys.stderr)
-            return 3
-
-    if not args.quiet:
-        print("Preview Metrics Snapshot:")
-        print(json.dumps(summary, indent=2))
-
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_benchmark.py
+++ b/code/scripts/preview_perf_benchmark.py
@ -1,349 +0,0 @@
-"""Ad-hoc performance benchmark for theme preview build latency (Phase A validation).
-
-Runs warm-up plus measured request loops against several theme slugs and prints
-aggregate latency stats (p50/p90/p95, cache hit ratio evolution). Intended to
-establish or validate that refactor did not introduce >5% p95 regression.
-
-Usage (ensure server running locally – commonly :8080 in docker compose):
-    python -m code.scripts.preview_perf_benchmark --themes 8 --loops 40 \
-            --url http://localhost:8080 --warm 1 --limit 12
-
-Theme slug discovery hierarchy (when --theme not provided):
-    1. Try /themes/index.json (legacy / planned static index)
-    2. Fallback to /themes/api/themes (current API) and take the first N ids
-The discovered slugs are sorted deterministically then truncated to N.
-
-NOTE: This is intentionally minimal (no external deps). For stable comparisons
-run with identical parameters pre/post-change and commit the JSON output under
-logs/perf/.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import statistics
-import time
-from typing import Any, Dict, List
-import urllib.request
-import urllib.error
-import sys
-from pathlib import Path
-
-
-def _fetch_json(url: str) -> Dict[str, Any]:
-    req = urllib.request.Request(url, headers={"Accept": "application/json"})
-    with urllib.request.urlopen(req, timeout=15) as resp:  # nosec B310 local dev
-        data = resp.read().decode("utf-8", "replace")
-    return json.loads(data)  # type: ignore[return-value]
-
-
-def _fetch_json_with_retry(url: str, attempts: int = 3, delay: float = 0.6) -> Dict[str, Any]:
-    last_error: Exception | None = None
-    for attempt in range(1, attempts + 1):
-        try:
-            return _fetch_json(url)
-        except Exception as exc:  # pragma: no cover - network variability
-            last_error = exc
-            if attempt < attempts:
-                print(json.dumps({  # noqa: T201
-                    "event": "preview_perf_fetch_retry",
-                    "url": url,
-                    "attempt": attempt,
-                    "max_attempts": attempts,
-                    "error": str(exc),
-                }))
-                time.sleep(delay * attempt)
-            else:
-                raise
-    raise last_error  # pragma: no cover - defensive; should be unreachable
-
-
-def select_theme_slugs(base_url: str, count: int) -> List[str]:
-    """Discover theme slugs for benchmarking.
-
-    Attempts legacy static index first, then falls back to live API listing.
-    """
-    errors: List[str] = []
-    slugs: List[str] = []
-    # Attempt 1: legacy /themes/index.json
-    try:
-        idx = _fetch_json(f"{base_url.rstrip('/')}/themes/index.json")
-        entries = idx.get("themes") or []
-        for it in entries:
-            if not isinstance(it, dict):
-                continue
-            slug = it.get("slug") or it.get("id") or it.get("theme_id")
-            if isinstance(slug, str):
-                slugs.append(slug)
-    except Exception as e:  # pragma: no cover - network variability
-        errors.append(f"index.json failed: {e}")
-
-    if not slugs:
-        # Attempt 2: live API listing
-        try:
-            listing = _fetch_json(f"{base_url.rstrip('/')}/themes/api/themes")
-            items = listing.get("items") or []
-            for it in items:
-                if not isinstance(it, dict):
-                    continue
-                tid = it.get("id") or it.get("slug") or it.get("theme_id")
-                if isinstance(tid, str):
-                    slugs.append(tid)
-        except Exception as e:  # pragma: no cover - network variability
-            errors.append(f"api/themes failed: {e}")
-
-    slugs = sorted(set(slugs))[:count]
-    if not slugs:
-        raise SystemExit("No theme slugs discovered; cannot benchmark (" + "; ".join(errors) + ")")
-    return slugs
-
-
-def fetch_all_theme_slugs(base_url: str, page_limit: int = 200) -> List[str]:
-    """Fetch all theme slugs via paginated /themes/api/themes endpoint.
-
-    Uses maximum page size (200) and iterates using offset until no next page.
-    Returns deterministic sorted unique list of slugs.
-    """
-    slugs: List[str] = []
-    offset = 0
-    seen: set[str] = set()
-    page_attempts = 5
-    page_delay = 1.2
-    while True:
-        url = f"{base_url.rstrip('/')}/themes/api/themes?limit={page_limit}&offset={offset}"
-        data: Dict[str, Any] | None = None
-        last_error: Exception | None = None
-        for attempt in range(1, page_attempts + 1):
-            try:
-                data = _fetch_json_with_retry(url, attempts=4, delay=0.75)
-                break
-            except Exception as exc:  # pragma: no cover - network variability
-                last_error = exc
-                if attempt < page_attempts:
-                    print(json.dumps({  # noqa: T201
-                        "event": "preview_perf_page_retry",
-                        "offset": offset,
-                        "attempt": attempt,
-                        "max_attempts": page_attempts,
-                        "error": str(exc),
-                    }))
-                    time.sleep(page_delay * attempt)
-                else:
-                    raise SystemExit(f"Failed fetching themes page offset={offset}: {exc}")
-        if data is None:  # pragma: no cover - defensive
-            raise SystemExit(f"Failed fetching themes page offset={offset}: {last_error}")
-        items = data.get("items") or []
-        for it in items:
-            if not isinstance(it, dict):
-                continue
-            tid = it.get("id") or it.get("slug") or it.get("theme_id")
-            if isinstance(tid, str) and tid not in seen:
-                seen.add(tid)
-                slugs.append(tid)
-        next_offset = data.get("next_offset")
-        if not next_offset or next_offset == offset:
-            break
-        offset = int(next_offset)
-    return sorted(slugs)
-
-
-def percentile(values: List[float], pct: float) -> float:
-    if not values:
-        return 0.0
-    sv = sorted(values)
-    k = (len(sv) - 1) * pct
-    f = int(k)
-    c = min(f + 1, len(sv) - 1)
-    if f == c:
-        return sv[f]
-    d0 = sv[f] * (c - k)
-    d1 = sv[c] * (k - f)
-    return d0 + d1
-
-
-def run_loop(base_url: str, slugs: List[str], loops: int, limit: int, warm: bool, path_template: str) -> Dict[str, Any]:
-    latencies: List[float] = []
-    per_slug_counts = {s: 0 for s in slugs}
-    t_start = time.time()
-    for i in range(loops):
-        slug = slugs[i % len(slugs)]
-        # path_template may contain {slug} and {limit}
-        try:
-            rel = path_template.format(slug=slug, limit=limit)
-        except Exception:
-            rel = f"/themes/api/theme/{slug}/preview?limit={limit}"
-        if not rel.startswith('/'):
-            rel = '/' + rel
-        url = f"{base_url.rstrip('/')}{rel}"
-        t0 = time.time()
-        try:
-            _fetch_json(url)
-        except Exception as e:
-            print(json.dumps({"event": "perf_benchmark_error", "slug": slug, "error": str(e)}))  # noqa: T201
-            continue
-        ms = (time.time() - t0) * 1000.0
-        latencies.append(ms)
-        per_slug_counts[slug] += 1
-    elapsed = time.time() - t_start
-    return {
-        "warm": warm,
-        "loops": loops,
-        "slugs": slugs,
-        "per_slug_requests": per_slug_counts,
-        "elapsed_s": round(elapsed, 3),
-        "p50_ms": round(percentile(latencies, 0.50), 2),
-        "p90_ms": round(percentile(latencies, 0.90), 2),
-        "p95_ms": round(percentile(latencies, 0.95), 2),
-        "avg_ms": round(statistics.mean(latencies), 2) if latencies else 0.0,
-        "count": len(latencies),
-        "_latencies": latencies,  # internal (removed in final result unless explicitly retained)
-    }
-
-
-def _stats_from_latencies(latencies: List[float]) -> Dict[str, Any]:
-    if not latencies:
-        return {"count": 0, "p50_ms": 0.0, "p90_ms": 0.0, "p95_ms": 0.0, "avg_ms": 0.0}
-    return {
-        "count": len(latencies),
-        "p50_ms": round(percentile(latencies, 0.50), 2),
-        "p90_ms": round(percentile(latencies, 0.90), 2),
-        "p95_ms": round(percentile(latencies, 0.95), 2),
-        "avg_ms": round(statistics.mean(latencies), 2),
-    }
-
-
-def main(argv: List[str]) -> int:
-    ap = argparse.ArgumentParser(description="Theme preview performance benchmark")
-    ap.add_argument("--url", default="http://localhost:8000", help="Base server URL (default: %(default)s)")
-    ap.add_argument("--themes", type=int, default=6, help="Number of theme slugs to exercise (default: %(default)s)")
-    ap.add_argument("--loops", type=int, default=60, help="Total request iterations (default: %(default)s)")
-    ap.add_argument("--limit", type=int, default=12, help="Preview size (default: %(default)s)")
-    ap.add_argument("--path-template", default="/themes/api/theme/{slug}/preview?limit={limit}", help="Format string for preview request path (default: %(default)s)")
-    ap.add_argument("--theme", action="append", dest="explicit_theme", help="Explicit theme slug(s); overrides automatic selection")
-    ap.add_argument("--warm", type=int, default=1, help="Number of warm-up loops (full cycles over selected slugs) (default: %(default)s)")
-    ap.add_argument("--output", type=Path, help="Optional JSON output path (committed under logs/perf)")
-    ap.add_argument("--all", action="store_true", help="Exercise ALL themes (ignores --themes; loops auto-set to passes*total_slugs unless --loops-explicit)")
-    ap.add_argument("--passes", type=int, default=1, help="When using --all, number of passes over the full theme set (default: %(default)s)")
-    # Hidden flag to detect if user explicitly set --loops (argparse has no direct support, so use sentinel technique)
-    # We keep original --loops for backwards compatibility; when --all we recompute unless user passed --loops-explicit
-    ap.add_argument("--loops-explicit", action="store_true", help=argparse.SUPPRESS)
-    ap.add_argument("--extract-warm-baseline", type=Path, help="If multi-pass (--all --passes >1), write a warm-only baseline JSON (final pass stats) to this path")
-    args = ap.parse_args(argv)
-
-    try:
-        if args.explicit_theme:
-            slugs = args.explicit_theme
-        elif args.all:
-            slugs = fetch_all_theme_slugs(args.url)
-        else:
-            slugs = select_theme_slugs(args.url, args.themes)
-    except SystemExit as e:  # pragma: no cover - dependency on live server
-        print(str(e), file=sys.stderr)
-        return 2
-
-    mode = "all" if args.all else "subset"
-    total_slugs = len(slugs)
-    if args.all and not args.loops_explicit:
-        # Derive loops = passes * total_slugs
-        args.loops = max(1, args.passes) * total_slugs
-
-    print(json.dumps({  # noqa: T201
-        "event": "preview_perf_start",
-        "mode": mode,
-        "total_slugs": total_slugs,
-        "planned_loops": args.loops,
-        "passes": args.passes if args.all else None,
-    }))
-
-    # Execution paths:
-    # 1. Standard subset or single-pass all: warm cycles -> single measured run
-    # 2. Multi-pass all mode (--all --passes >1): iterate passes capturing per-pass stats (no separate warm loops)
-    if args.all and args.passes > 1:
-        pass_results: List[Dict[str, Any]] = []
-        combined_latencies: List[float] = []
-        t0_all = time.time()
-        for p in range(1, args.passes + 1):
-            r = run_loop(args.url, slugs, len(slugs), args.limit, warm=(p == 1), path_template=args.path_template)
-            lat = r.pop("_latencies", [])
-            combined_latencies.extend(lat)
-            pass_result = {
-                "pass": p,
-                "warm": r["warm"],
-                "elapsed_s": r["elapsed_s"],
-                "p50_ms": r["p50_ms"],
-                "p90_ms": r["p90_ms"],
-                "p95_ms": r["p95_ms"],
-                "avg_ms": r["avg_ms"],
-                "count": r["count"],
-            }
-            pass_results.append(pass_result)
-        total_elapsed = round(time.time() - t0_all, 3)
-        aggregate = _stats_from_latencies(combined_latencies)
-        result = {
-            "mode": mode,
-            "total_slugs": total_slugs,
-            "passes": args.passes,
-            "slugs": slugs,
-            "combined": {
-                **aggregate,
-                "elapsed_s": total_elapsed,
-            },
-            "passes_results": pass_results,
-            "cold_pass_p95_ms": pass_results[0]["p95_ms"],
-            "warm_pass_p95_ms": pass_results[-1]["p95_ms"],
-            "cold_pass_p50_ms": pass_results[0]["p50_ms"],
-            "warm_pass_p50_ms": pass_results[-1]["p50_ms"],
-        }
-        print(json.dumps({"event": "preview_perf_result", **result}, indent=2))  # noqa: T201
-        # Optional warm baseline extraction (final pass only; represents warmed steady-state)
-        if args.extract_warm_baseline:
-            try:
-                wb = pass_results[-1]
-                warm_obj = {
-                    "event": "preview_perf_warm_baseline",
-                    "mode": mode,
-                    "total_slugs": total_slugs,
-                    "warm_baseline": True,
-                    "source_pass": wb["pass"],
-                    "p50_ms": wb["p50_ms"],
-                    "p90_ms": wb["p90_ms"],
-                    "p95_ms": wb["p95_ms"],
-                    "avg_ms": wb["avg_ms"],
-                    "count": wb["count"],
-                    "slugs": slugs,
-                }
-                args.extract_warm_baseline.parent.mkdir(parents=True, exist_ok=True)
-                args.extract_warm_baseline.write_text(json.dumps(warm_obj, indent=2, sort_keys=True), encoding="utf-8")
-                print(json.dumps({  # noqa: T201
-                    "event": "preview_perf_warm_baseline_written",
-                    "path": str(args.extract_warm_baseline),
-                    "p95_ms": wb["p95_ms"],
-                }))
-            except Exception as e:  # pragma: no cover
-                print(json.dumps({"event": "preview_perf_warm_baseline_error", "error": str(e)}))  # noqa: T201
-    else:
-        # Warm-up loops first (if requested)
-        for w in range(args.warm):
-            run_loop(args.url, slugs, len(slugs), args.limit, warm=True, path_template=args.path_template)
-        result = run_loop(args.url, slugs, args.loops, args.limit, warm=False, path_template=args.path_template)
-        result.pop("_latencies", None)
-        result["slugs"] = slugs
-        result["mode"] = mode
-        result["total_slugs"] = total_slugs
-        if args.all:
-            result["passes"] = args.passes
-        print(json.dumps({"event": "preview_perf_result", **result}, indent=2))  # noqa: T201
-
-    if args.output:
-        try:
-            args.output.parent.mkdir(parents=True, exist_ok=True)
-            # Ensure we write the final result object (multi-pass already prepared above)
-            args.output.write_text(json.dumps(result, indent=2, sort_keys=True), encoding="utf-8")
-        except Exception as e:  # pragma: no cover
-            print(f"ERROR: failed writing output file: {e}", file=sys.stderr)
-            return 3
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_ci_check.py
+++ b/code/scripts/preview_perf_ci_check.py
@ -1,106 +0,0 @@
-"""CI helper: run a warm-pass benchmark candidate (single pass over all themes)
-then compare against the committed warm baseline with threshold enforcement.
-
-Intended usage (example):
-  python -m code.scripts.preview_perf_ci_check --url http://localhost:8080 \
-      --baseline logs/perf/theme_preview_warm_baseline.json --p95-threshold 5
-
-Exit codes:
-  0 success (within threshold)
-  2 regression (p95 delta > threshold)
-  3 setup / usage error
-
-Notes:
- Uses --all --passes 1 to create a fresh candidate snapshot that approximates
-  a warmed steady-state (server should have background refresh / typical load).
- If you prefer multi-pass then warm-only selection, adjust logic accordingly.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-import subprocess
-import sys
-import time
-import urllib.error
-import urllib.request
-from pathlib import Path
-def _wait_for_service(base_url: str, attempts: int = 12, delay: float = 1.5) -> bool:
-    health_url = base_url.rstrip("/") + "/healthz"
-    last_error: Exception | None = None
-    for attempt in range(1, attempts + 1):
-        try:
-            with urllib.request.urlopen(health_url, timeout=5) as resp:  # nosec B310 local CI
-                if 200 <= resp.status < 300:
-                    return True
-        except urllib.error.HTTPError as exc:
-            last_error = exc
-            if 400 <= exc.code < 500 and exc.code != 429:
-                # Treat permanent client errors (other than rate limit) as fatal
-                break
-        except Exception as exc:  # pragma: no cover - network variability
-            last_error = exc
-    time.sleep(delay * attempt)
-    print(json.dumps({
-        "event": "ci_perf_error",
-        "stage": "startup",
-        "message": "Service health check failed",
-        "url": health_url,
-        "attempts": attempts,
-        "error": str(last_error) if last_error else None,
-    }))
-    return False
-
-def run(cmd: list[str]) -> subprocess.CompletedProcess:
-    return subprocess.run(cmd, capture_output=True, text=True, check=False)
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Preview performance CI regression gate")
-    ap.add_argument("--url", default="http://localhost:8080", help="Base URL of running web service")
-    ap.add_argument("--baseline", type=Path, required=True, help="Path to committed warm baseline JSON")
-    ap.add_argument("--p95-threshold", type=float, default=5.0, help="Max allowed p95 regression percent (default: %(default)s)")
-    ap.add_argument("--candidate-output", type=Path, default=Path("logs/perf/theme_preview_ci_candidate.json"), help="Where to write candidate benchmark JSON")
-    ap.add_argument("--multi-pass", action="store_true", help="Run a 2-pass all-themes benchmark and compare warm pass only (optional enhancement)")
-    args = ap.parse_args(argv)
-
-    if not args.baseline.exists():
-        print(json.dumps({"event":"ci_perf_error","message":"Baseline not found","path":str(args.baseline)}))
-        return 3
-
-    if not _wait_for_service(args.url):
-        return 3
-
-    # Run candidate single-pass all-themes benchmark (no extra warm cycles to keep CI fast)
-    # If multi-pass requested, run two passes over all themes so second pass represents warmed steady-state.
-    passes = "2" if args.multi_pass else "1"
-    bench_cmd = [sys.executable, "-m", "code.scripts.preview_perf_benchmark", "--url", args.url, "--all", "--passes", passes, "--output", str(args.candidate_output)]
-    bench_proc = run(bench_cmd)
-    if bench_proc.returncode != 0:
-        print(json.dumps({"event":"ci_perf_error","stage":"benchmark","code":bench_proc.returncode,"stderr":bench_proc.stderr}))
-        return 3
-    print(bench_proc.stdout)
-
-    if not args.candidate_output.exists():
-        print(json.dumps({"event":"ci_perf_error","message":"Candidate output missing"}))
-        return 3
-
-    compare_cmd = [
-        sys.executable,
-        "-m","code.scripts.preview_perf_compare",
-        "--baseline", str(args.baseline),
-        "--candidate", str(args.candidate_output),
-        "--warm-only",
-        "--p95-threshold", str(args.p95_threshold),
-    ]
-    cmp_proc = run(compare_cmd)
-    print(cmp_proc.stdout)
-    if cmp_proc.returncode == 2:
-        # Already printed JSON with failure status
-        return 2
-    if cmp_proc.returncode != 0:
-        print(json.dumps({"event":"ci_perf_error","stage":"compare","code":cmp_proc.returncode,"stderr":cmp_proc.stderr}))
-        return 3
-    return 0
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/scripts/preview_perf_compare.py
+++ b/code/scripts/preview_perf_compare.py
@ -1,115 +0,0 @@
-"""Compare two preview benchmark JSON result files and emit delta stats.
-
-Usage:
-  python -m code.scripts.preview_perf_compare --baseline logs/perf/theme_preview_baseline_all_pass1_20250923.json --candidate logs/perf/new_run.json
-
-Outputs JSON with percentage deltas for p50/p90/p95/avg (positive = regression/slower).
-If multi-pass structures are present (combined & passes_results) those are included.
-"""
-from __future__ import annotations
-
-import argparse
-import json
-from pathlib import Path
-from typing import Any, Dict
-
-
-def load(path: Path) -> Dict[str, Any]:
-    data = json.loads(path.read_text(encoding="utf-8"))
-    # Multi-pass result may store stats under combined
-    if "combined" in data:
-        core = data["combined"].copy()
-        # Inject representative fields for uniform comparison
-        core["p50_ms"] = core.get("p50_ms") or data.get("p50_ms")
-        core["p90_ms"] = core.get("p90_ms") or data.get("p90_ms")
-        core["p95_ms"] = core.get("p95_ms") or data.get("p95_ms")
-        core["avg_ms"] = core.get("avg_ms") or data.get("avg_ms")
-        data["_core_stats"] = core
-    else:
-        data["_core_stats"] = {
-            k: data.get(k) for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms", "count")
-        }
-    return data
-
-
-def pct_delta(new: float, old: float) -> float:
-    if old == 0:
-        return 0.0
-    return round(((new - old) / old) * 100.0, 2)
-
-
-def compare(baseline: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
-    b = baseline["_core_stats"]
-    c = candidate["_core_stats"]
-    result = {"baseline_count": b.get("count"), "candidate_count": c.get("count")}
-    for k in ("p50_ms", "p90_ms", "p95_ms", "avg_ms"):
-        if b.get(k) is not None and c.get(k) is not None:
-            result[k] = {
-                "baseline": b[k],
-                "candidate": c[k],
-                "delta_pct": pct_delta(c[k], b[k]),
-            }
-    # If both have per-pass details include first and last pass p95/p50
-    if "passes_results" in baseline and "passes_results" in candidate:
-        result["passes"] = {
-            "baseline": {
-                "cold_p95": baseline.get("cold_pass_p95_ms"),
-                "warm_p95": baseline.get("warm_pass_p95_ms"),
-                "cold_p50": baseline.get("cold_pass_p50_ms"),
-                "warm_p50": baseline.get("warm_pass_p50_ms"),
-            },
-            "candidate": {
-                "cold_p95": candidate.get("cold_pass_p95_ms"),
-                "warm_p95": candidate.get("warm_pass_p95_ms"),
-                "cold_p50": candidate.get("cold_pass_p50_ms"),
-                "warm_p50": candidate.get("warm_pass_p50_ms"),
-            },
-        }
-    return result
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Compare two preview benchmark JSON result files")
-    ap.add_argument("--baseline", required=True, type=Path, help="Baseline JSON path")
-    ap.add_argument("--candidate", required=True, type=Path, help="Candidate JSON path")
-    ap.add_argument("--p95-threshold", type=float, default=None, help="Fail (exit 2) if p95 regression exceeds this percent (positive delta)")
-    ap.add_argument("--warm-only", action="store_true", help="When both results have passes, compare warm pass p95/p50 instead of combined/core")
-    args = ap.parse_args(argv)
-    if not args.baseline.exists():
-        raise SystemExit(f"Baseline not found: {args.baseline}")
-    if not args.candidate.exists():
-        raise SystemExit(f"Candidate not found: {args.candidate}")
-    baseline = load(args.baseline)
-    candidate = load(args.candidate)
-    # If warm-only requested and both have warm pass stats, override _core_stats before compare
-    if args.warm_only and "warm_pass_p95_ms" in baseline and "warm_pass_p95_ms" in candidate:
-        baseline["_core_stats"] = {
-            "p50_ms": baseline.get("warm_pass_p50_ms"),
-            "p90_ms": baseline.get("_core_stats", {}).get("p90_ms"),  # p90 not tracked per-pass; retain combined
-            "p95_ms": baseline.get("warm_pass_p95_ms"),
-            "avg_ms": baseline.get("_core_stats", {}).get("avg_ms"),
-            "count": baseline.get("_core_stats", {}).get("count"),
-        }
-        candidate["_core_stats"] = {
-            "p50_ms": candidate.get("warm_pass_p50_ms"),
-            "p90_ms": candidate.get("_core_stats", {}).get("p90_ms"),
-            "p95_ms": candidate.get("warm_pass_p95_ms"),
-            "avg_ms": candidate.get("_core_stats", {}).get("avg_ms"),
-            "count": candidate.get("_core_stats", {}).get("count"),
-        }
-    cmp = compare(baseline, candidate)
-    payload = {"event": "preview_perf_compare", **cmp}
-    if args.p95_threshold is not None and "p95_ms" in cmp:
-        delta = cmp["p95_ms"]["delta_pct"]
-        payload["threshold"] = {"p95_threshold": args.p95_threshold, "p95_delta_pct": delta}
-        if delta is not None and delta > args.p95_threshold:
-            payload["result"] = "fail"
-            print(json.dumps(payload, indent=2))  # noqa: T201
-            return 2
-        payload["result"] = "pass"
-    print(json.dumps(payload, indent=2))  # noqa: T201
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main(__import__('sys').argv[1:]))
--- a/code/scripts/profile_multi_theme_filter.py
+++ b/code/scripts/profile_multi_theme_filter.py
@ -42,7 +42,7 @@ def _sample_combinations(tags: List[str], iterations: int) -> List[Tuple[str | N

 def _collect_tag_pool(df: pd.DataFrame) -> List[str]:
    tag_pool: set[str] = set()
-    for tags in df.get("_ltags", []):  # type: ignore[assignment]
+    for tags in df.get("_ltags", []):
        if not tags:
            continue
        for token in tags:
--- a/code/scripts/refresh_commander_catalog.py
+++ b/code/scripts/refresh_commander_catalog.py
@ -37,7 +37,7 @@ def _refresh_setup() -> None:

 def _refresh_tags() -> None:
    tagger = importlib.import_module("code.tagging.tagger")
-    tagger = importlib.reload(tagger)  # type: ignore[assignment]
+    tagger = importlib.reload(tagger)
    for color in SUPPORTED_COLORS:
        tagger.load_dataframe(color)

--- a/code/scripts/report_random_theme_pool.py
+++ b/code/scripts/report_random_theme_pool.py
@ -21,7 +21,7 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
 if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

-from deck_builder.random_entrypoint import (  # type: ignore  # noqa: E402
+from deck_builder.random_entrypoint import (  # noqa: E402
    _build_random_theme_pool,
    _ensure_theme_tag_cache,
    _load_commanders_df,
--- a/code/scripts/synergy_promote_fill.py
+++ b/code/scripts/synergy_promote_fill.py
@ -731,7 +731,7 @@ def main():  # pragma: no cover (script orchestration)
                if cand:
                    theme_card_hits[display] = cand
            # Build global duplicate frequency map ONCE (baseline prior to this run) if threshold active
-            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' not in globals():  # type: ignore
+            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' not in globals():
                freq: Dict[str, int] = {}
                total_themes = 0
                for fp0 in CATALOG_DIR.glob('*.yml'):
@ -748,10 +748,10 @@ def main():  # pragma: no cover (script orchestration)
                            continue
                        seen_local.add(c)
                        freq[c] = freq.get(c, 0) + 1
-                globals()['GLOBAL_CARD_FREQ'] = (freq, total_themes)  # type: ignore
+                globals()['GLOBAL_CARD_FREQ'] = (freq, total_themes)
            # Apply duplicate filtering to candidate lists (do NOT mutate existing example_cards)
-            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' in globals():  # type: ignore
-                freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']  # type: ignore
+            if args.common_card_threshold > 0 and 'GLOBAL_CARD_FREQ' in globals():
+                freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']
                if total_prev > 0:  # avoid div-by-zero
                    cutoff = args.common_card_threshold
                    def _filter(lst: List[Tuple[float, str, Set[str]]]) -> List[Tuple[float, str, Set[str]]]:
@ -803,8 +803,8 @@ def main():  # pragma: no cover (script orchestration)
    print(f"[promote] modified {changed_count} themes")
    if args.fill_example_cards:
        print(f"[cards] modified {cards_changed} themes (target {args.cards_target})")
-        if args.print_dup_metrics and 'GLOBAL_CARD_FREQ' in globals():  # type: ignore
-            freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']  # type: ignore
+        if args.print_dup_metrics and 'GLOBAL_CARD_FREQ' in globals():
+            freq_map, total_prev = globals()['GLOBAL_CARD_FREQ']
            if total_prev:
                items = sorted(freq_map.items(), key=lambda x: (-x[1], x[0]))[:30]
                print('[dup-metrics] Top shared example_cards (baseline before this run):')
--- a/code/scripts/validate_theme_catalog.py
+++ b/code/scripts/validate_theme_catalog.py
@ -31,9 +31,9 @@ CODE_ROOT = ROOT / 'code'
 if str(CODE_ROOT) not in sys.path:
    sys.path.insert(0, str(CODE_ROOT))

-from type_definitions_theme_catalog import ThemeCatalog, ThemeYAMLFile  # type: ignore
-from scripts.extract_themes import load_whitelist_config  # type: ignore
-from scripts.build_theme_catalog import build_catalog  # type: ignore
+from type_definitions_theme_catalog import ThemeCatalog, ThemeYAMLFile
+from scripts.extract_themes import load_whitelist_config
+from scripts.build_theme_catalog import build_catalog

 CATALOG_JSON = ROOT / 'config' / 'themes' / 'theme_list.json'

--- a/code/scripts/warm_preview_traffic.py
+++ b/code/scripts/warm_preview_traffic.py
@ -1,91 +0,0 @@
-"""Generate warm preview traffic to populate theme preview cache & metrics.
-
-Usage:
-  python -m code.scripts.warm_preview_traffic --count 25 --repeats 2 \
-      --base-url http://localhost:8000 --delay 0.05
-
-Requirements:
-  - FastAPI server running locally exposing /themes endpoints
-  - WEB_THEME_PICKER_DIAGNOSTICS=1 so /themes/metrics is accessible
-
-Strategy:
-  1. Fetch /themes/fragment/list?limit=COUNT to obtain HTML table.
-  2. Extract theme slugs via regex on data-theme-id attributes.
-  3. Issue REPEATS preview fragment requests per slug in order.
-  4. Print simple timing / status summary.
-
-This script intentionally uses stdlib only (urllib, re, time) to avoid extra deps.
-"""
-from __future__ import annotations
-
-import argparse
-import re
-import time
-import urllib.request
-import urllib.error
-from typing import List
-
-LIST_PATH = "/themes/fragment/list"
-PREVIEW_PATH = "/themes/fragment/preview/{slug}"
-
-
-def fetch(url: str) -> str:
-    req = urllib.request.Request(url, headers={"User-Agent": "warm-preview/1"})
-    with urllib.request.urlopen(req, timeout=15) as resp:  # nosec B310 (local trusted)
-        return resp.read().decode("utf-8", "replace")
-
-
-def extract_slugs(html: str, limit: int) -> List[str]:
-    slugs = []
-    for m in re.finditer(r'data-theme-id="([^"]+)"', html):
-        s = m.group(1).strip()
-        if s and s not in slugs:
-            slugs.append(s)
-        if len(slugs) >= limit:
-            break
-    return slugs
-
-
-def warm(base_url: str, count: int, repeats: int, delay: float) -> None:
-    list_url = f"{base_url}{LIST_PATH}?limit={count}&offset=0"
-    print(f"[warm] Fetching list: {list_url}")
-    try:
-        html = fetch(list_url)
-    except urllib.error.URLError as e:  # pragma: no cover
-        raise SystemExit(f"Failed fetching list: {e}")
-    slugs = extract_slugs(html, count)
-    if not slugs:
-        raise SystemExit("No theme slugs extracted – cannot warm.")
-    print(f"[warm] Extracted {len(slugs)} slugs: {', '.join(slugs[:8])}{'...' if len(slugs)>8 else ''}")
-    total_requests = 0
-    start = time.time()
-    for r in range(repeats):
-        print(f"[warm] Pass {r+1}/{repeats}")
-        for slug in slugs:
-            url = f"{base_url}{PREVIEW_PATH.format(slug=slug)}"
-            try:
-                fetch(url)
-            except Exception as e:  # pragma: no cover
-                print(f"  [warn] Failed {slug}: {e}")
-            else:
-                total_requests += 1
-            if delay:
-                time.sleep(delay)
-    dur = time.time() - start
-    print(f"[warm] Completed {total_requests} preview requests in {dur:.2f}s ({total_requests/dur if dur>0 else 0:.1f} rps)")
-    print("[warm] Done. Now run metrics snapshot to capture warm p95.")
-
-
-def main(argv: list[str]) -> int:
-    ap = argparse.ArgumentParser(description="Generate warm preview traffic")
-    ap.add_argument("--base-url", default="http://localhost:8000", help="Base URL (default: %(default)s)")
-    ap.add_argument("--count", type=int, default=25, help="Number of distinct theme slugs to warm (default: %(default)s)")
-    ap.add_argument("--repeats", type=int, default=2, help="Repeat passes over slugs (default: %(default)s)")
-    ap.add_argument("--delay", type=float, default=0.05, help="Delay between requests in seconds (default: %(default)s)")
-    args = ap.parse_args(argv)
-    warm(args.base_url.rstrip("/"), args.count, args.repeats, args.delay)
-    return 0
-
-if __name__ == "__main__":  # pragma: no cover
-    import sys
-    raise SystemExit(main(sys.argv[1:]))
--- a/code/services/init.py
+++ b/code/services/init.py
@ -0,0 +1,6 @@
+"""Services package for MTG Python Deckbuilder."""
+
+from code.services.all_cards_loader import AllCardsLoader
+from code.services.card_query_builder import CardQueryBuilder
+
+__all__ = ["AllCardsLoader", "CardQueryBuilder"]
--- a/code/services/all_cards_loader.py
+++ b/code/services/all_cards_loader.py
@ -0,0 +1,292 @@
+"""
+All Cards Loader
+
+Provides efficient loading and querying of the consolidated all_cards.parquet file.
+Features in-memory caching with TTL and automatic reload on file changes.
+
+Usage:
+    loader = AllCardsLoader()
+    
+    # Single card lookup
+    card = loader.get_by_name("Sol Ring")
+    
+    # Batch lookup
+    cards = loader.get_by_names(["Sol Ring", "Lightning Bolt", "Counterspell"])
+    
+    # Filter by color identity
+    blue_cards = loader.filter_by_color_identity(["U"])
+    
+    # Filter by themes
+    token_cards = loader.filter_by_themes(["tokens"], mode="any")
+    
+    # Simple text search
+    results = loader.search("create token", limit=100)
+"""
+
+from __future__ import annotations
+
+import os
+import time
+from typing import Optional
+
+import pandas as pd
+
+from code.logging_util import get_logger
+
+# Initialize logger
+logger = get_logger(__name__)
+
+
+class AllCardsLoader:
+    """Loads and caches the consolidated all_cards.parquet file with query methods."""
+
+    def __init__(self, file_path: Optional[str] = None, cache_ttl: int = 300) -> None:
+        """
+        Initialize AllCardsLoader.
+
+        Args:
+            file_path: Path to all_cards.parquet (defaults to card_files/processed/all_cards.parquet)
+            cache_ttl: Time-to-live for cache in seconds (default: 300 = 5 minutes)
+        """
+        if file_path is None:
+            from code.path_util import get_processed_cards_path
+            file_path = get_processed_cards_path()
+        
+        self.file_path = file_path
+        self.cache_ttl = cache_ttl
+        self._df: Optional[pd.DataFrame] = None
+        self._last_load_time: float = 0
+        self._file_mtime: float = 0
+
+    def load(self, force_reload: bool = False) -> pd.DataFrame:
+        """
+        Load all_cards.parquet with caching.
+
+        Returns cached DataFrame if:
+        - Cache exists
+        - Cache is not expired (within TTL)
+        - File hasn't been modified since last load
+        - force_reload is False
+
+        Args:
+            force_reload: Force reload from disk even if cached
+
+        Returns:
+            DataFrame containing all cards
+
+        Raises:
+            FileNotFoundError: If all_cards.parquet doesn't exist
+        """
+        if not os.path.exists(self.file_path):
+            raise FileNotFoundError(f"All cards file not found: {self.file_path}")
+
+        # Check if we need to reload
+        current_time = time.time()
+        file_mtime = os.path.getmtime(self.file_path)
+
+        cache_valid = (
+            self._df is not None
+            and not force_reload
+            and (current_time - self._last_load_time) < self.cache_ttl
+            and file_mtime == self._file_mtime
+        )
+
+        if cache_valid:
+            return self._df  # type: ignore
+
+        # Load from disk
+        logger.info(f"Loading all_cards from {self.file_path}...")
+        start_time = time.time()
+        self._df = pd.read_parquet(self.file_path, engine="pyarrow")
+        elapsed = time.time() - start_time
+
+        self._last_load_time = current_time
+        self._file_mtime = file_mtime
+
+        logger.info(
+            f"Loaded {len(self._df)} cards with {len(self._df.columns)} columns in {elapsed:.3f}s"
+        )
+
+        return self._df
+
+    def get_by_name(self, name: str) -> Optional[pd.Series]:
+        """
+        Get a single card by exact name match.
+
+        Args:
+            name: Card name to search for
+
+        Returns:
+            Series containing card data, or None if not found
+        """
+        df = self.load()
+        if "name" not in df.columns:
+            logger.warning("'name' column not found in all_cards")
+            return None
+
+        # Use .loc[] for faster exact match lookup
+        try:
+            matches = df.loc[df["name"] == name]
+            if matches.empty:
+                return None
+            return matches.iloc[0]
+        except (KeyError, IndexError):
+            return None
+
+    def get_by_names(self, names: list[str]) -> pd.DataFrame:
+        """
+        Get multiple cards by exact name matches (batch lookup).
+
+        Args:
+            names: List of card names to search for
+
+        Returns:
+            DataFrame containing matching cards (may be empty)
+        """
+        df = self.load()
+        if "name" not in df.columns:
+            logger.warning("'name' column not found in all_cards")
+            return pd.DataFrame()
+
+        return df[df["name"].isin(names)]
+
+    def filter_by_color_identity(self, colors: list[str]) -> pd.DataFrame:
+        """
+        Filter cards by color identity.
+
+        Args:
+            colors: List of color codes (e.g., ["W", "U"], ["Colorless"], ["G", "R", "U"])
+
+        Returns:
+            DataFrame containing cards matching the color identity
+        """
+        df = self.load()
+        if "colorIdentity" not in df.columns:
+            logger.warning("'colorIdentity' column not found in all_cards")
+            return pd.DataFrame()
+
+        # Convert colors list to a set for comparison
+        color_set = set(colors)
+
+        # Handle special case for colorless
+        if "Colorless" in color_set or "colorless" in color_set:
+            return df[df["colorIdentity"].isin(["Colorless", "colorless"])]
+
+        # For multi-color searches, match any card that contains those colors
+        # This is a simple exact match - could be enhanced for subset/superset matching
+        if len(colors) == 1:
+            # Single color - exact match
+            return df[df["colorIdentity"] == colors[0]]
+        else:
+            # Multi-color - match any of the provided colors (could be refined)
+            return df[df["colorIdentity"].isin(colors)]
+
+    def filter_by_themes(self, themes: list[str], mode: str = "any") -> pd.DataFrame:
+        """
+        Filter cards by theme tags.
+
+        Args:
+            themes: List of theme tags to search for
+            mode: "any" (at least one theme) or "all" (must have all themes)
+
+        Returns:
+            DataFrame containing cards matching the theme criteria
+        """
+        df = self.load()
+        if "themeTags" not in df.columns:
+            logger.warning("'themeTags' column not found in all_cards")
+            return pd.DataFrame()
+
+        if mode == "all":
+            # Card must have all specified themes
+            mask = pd.Series([True] * len(df), index=df.index)
+            for theme in themes:
+                mask &= df["themeTags"].str.contains(theme, case=False, na=False)
+            return df[mask]
+        else:
+            # Card must have at least one of the specified themes (default)
+            mask = pd.Series([False] * len(df), index=df.index)
+            for theme in themes:
+                mask |= df["themeTags"].str.contains(theme, case=False, na=False)
+            return df[mask]
+
+    def search(self, query: str, limit: int = 100) -> pd.DataFrame:
+        """
+        Simple text search across card name, type, and oracle text.
+
+        Args:
+            query: Search query string
+            limit: Maximum number of results to return
+
+        Returns:
+            DataFrame containing matching cards (up to limit)
+        """
+        df = self.load()
+
+        # Search across multiple columns
+        mask = pd.Series([False] * len(df), index=df.index)
+
+        if "name" in df.columns:
+            mask |= df["name"].str.contains(query, case=False, na=False)
+
+        if "type" in df.columns:
+            mask |= df["type"].str.contains(query, case=False, na=False)
+
+        if "text" in df.columns:
+            mask |= df["text"].str.contains(query, case=False, na=False)
+
+        results = df[mask]
+
+        if len(results) > limit:
+            return results.head(limit)
+
+        return results
+
+    def filter_by_type(self, type_query: str) -> pd.DataFrame:
+        """
+        Filter cards by type line (supports partial matching).
+
+        Args:
+            type_query: Type string to search for (e.g., "Creature", "Instant", "Artifact")
+
+        Returns:
+            DataFrame containing cards matching the type
+        """
+        df = self.load()
+        if "type" not in df.columns:
+            logger.warning("'type' column not found in all_cards")
+            return pd.DataFrame()
+
+        return df[df["type"].str.contains(type_query, case=False, na=False)]
+
+    def get_stats(self) -> dict:
+        """
+        Get statistics about the loaded card data.
+
+        Returns:
+            Dictionary with card count, column count, file size, and load time
+        """
+        df = self.load()
+
+        stats = {
+            "total_cards": len(df),
+            "columns": len(df.columns),
+            "file_path": self.file_path,
+            "file_size_mb": (
+                round(os.path.getsize(self.file_path) / (1024 * 1024), 2)
+                if os.path.exists(self.file_path)
+                else 0
+            ),
+            "cached": self._df is not None,
+            "cache_age_seconds": int(time.time() - self._last_load_time)
+            if self._last_load_time > 0
+            else None,
+        }
+
+        return stats
+
+    def clear_cache(self) -> None:
+        """Clear the cached DataFrame, forcing next load to read from disk."""
+        self._df = None
+        self._last_load_time = 0
+        logger.info("Cache cleared")
--- a/code/services/card_query_builder.py
+++ b/code/services/card_query_builder.py
@ -0,0 +1,207 @@
+"""
+Card Query Builder
+
+Provides a fluent API for building complex card queries against the consolidated all_cards.parquet.
+
+Usage:
+    from code.services.card_query_builder import CardQueryBuilder
+    
+    # Simple query
+    builder = CardQueryBuilder()
+    cards = builder.colors(["W", "U"]).execute()
+    
+    # Complex query
+    cards = (CardQueryBuilder()
+        .colors(["G"])
+        .themes(["tokens"], mode="any")
+        .types("Creature")
+        .limit(20)
+        .execute())
+    
+    # Get specific cards
+    cards = CardQueryBuilder().names(["Sol Ring", "Lightning Bolt"]).execute()
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import pandas as pd
+
+from code.services.all_cards_loader import AllCardsLoader
+
+
+class CardQueryBuilder:
+    """Fluent API for building card queries."""
+
+    def __init__(self, loader: Optional[AllCardsLoader] = None) -> None:
+        """
+        Initialize CardQueryBuilder.
+
+        Args:
+            loader: AllCardsLoader instance (creates default if None)
+        """
+        self._loader = loader or AllCardsLoader()
+        self._color_filter: Optional[list[str]] = None
+        self._theme_filter: Optional[list[str]] = None
+        self._theme_mode: str = "any"
+        self._type_filter: Optional[str] = None
+        self._name_filter: Optional[list[str]] = None
+        self._search_query: Optional[str] = None
+        self._limit: Optional[int] = None
+
+    def colors(self, colors: list[str]) -> CardQueryBuilder:
+        """
+        Filter by color identity.
+
+        Args:
+            colors: List of color codes (e.g., ["W", "U"])
+
+        Returns:
+            Self for chaining
+        """
+        self._color_filter = colors
+        return self
+
+    def themes(self, themes: list[str], mode: str = "any") -> CardQueryBuilder:
+        """
+        Filter by theme tags.
+
+        Args:
+            themes: List of theme tags
+            mode: "any" (at least one) or "all" (must have all)
+
+        Returns:
+            Self for chaining
+        """
+        self._theme_filter = themes
+        self._theme_mode = mode
+        return self
+
+    def types(self, type_query: str) -> CardQueryBuilder:
+        """
+        Filter by type line (partial match).
+
+        Args:
+            type_query: Type string to search for
+
+        Returns:
+            Self for chaining
+        """
+        self._type_filter = type_query
+        return self
+
+    def names(self, names: list[str]) -> CardQueryBuilder:
+        """
+        Filter by specific card names (batch lookup).
+
+        Args:
+            names: List of card names
+
+        Returns:
+            Self for chaining
+        """
+        self._name_filter = names
+        return self
+
+    def search(self, query: str) -> CardQueryBuilder:
+        """
+        Add text search across name, type, and oracle text.
+
+        Args:
+            query: Search query string
+
+        Returns:
+            Self for chaining
+        """
+        self._search_query = query
+        return self
+
+    def limit(self, limit: int) -> CardQueryBuilder:
+        """
+        Limit number of results.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            Self for chaining
+        """
+        self._limit = limit
+        return self
+
+    def execute(self) -> pd.DataFrame:
+        """
+        Execute the query and return results.
+
+        Returns:
+            DataFrame containing matching cards
+        """
+        # Start with all cards or specific names
+        if self._name_filter:
+            df = self._loader.get_by_names(self._name_filter)
+        else:
+            df = self._loader.load()
+
+        # Apply color filter
+        if self._color_filter:
+            color_results = self._loader.filter_by_color_identity(self._color_filter)
+            df = df[df.index.isin(color_results.index)]
+
+        # Apply theme filter
+        if self._theme_filter:
+            theme_results = self._loader.filter_by_themes(self._theme_filter, mode=self._theme_mode)
+            df = df[df.index.isin(theme_results.index)]
+
+        # Apply type filter
+        if self._type_filter:
+            type_results = self._loader.filter_by_type(self._type_filter)
+            df = df[df.index.isin(type_results.index)]
+
+        # Apply text search
+        if self._search_query:
+            search_results = self._loader.search(self._search_query, limit=999999)
+            df = df[df.index.isin(search_results.index)]
+
+        # Apply limit
+        if self._limit and len(df) > self._limit:
+            df = df.head(self._limit)
+
+        return df
+
+    def count(self) -> int:
+        """
+        Count results without returning full DataFrame.
+
+        Returns:
+            Number of matching cards
+        """
+        return len(self.execute())
+
+    def first(self) -> Optional[pd.Series]:
+        """
+        Get first result only.
+
+        Returns:
+            First matching card as Series, or None if no results
+        """
+        results = self.execute()
+        if results.empty:
+            return None
+        return results.iloc[0]
+
+    def reset(self) -> CardQueryBuilder:
+        """
+        Reset all filters.
+
+        Returns:
+            Self for chaining
+        """
+        self._color_filter = None
+        self._theme_filter = None
+        self._theme_mode = "any"
+        self._type_filter = None
+        self._name_filter = None
+        self._search_query = None
+        self._limit = None
+        return self
--- a/code/services/legacy_loader_adapter.py
+++ b/code/services/legacy_loader_adapter.py
@ -0,0 +1,281 @@
+"""
+Legacy Loader Adapter
+
+Provides backward-compatible wrapper functions around AllCardsLoader for smooth migration.
+Existing code can continue using old file-loading patterns while benefiting from
+the new consolidated Parquet backend.
+
+This adapter will be maintained through v3.0.x and deprecated in v3.1+.
+
+Usage:
+    # Old code (still works):
+    from code.services.legacy_loader_adapter import load_cards_by_type
+    creatures = load_cards_by_type("Creature")
+    
+    # New code (preferred):
+    from code.services.all_cards_loader import AllCardsLoader
+    loader = AllCardsLoader()
+    creatures = loader.filter_by_type("Creature")
+"""
+
+from __future__ import annotations
+
+import warnings
+from typing import Optional
+
+import pandas as pd
+
+from code.logging_util import get_logger
+from code.services.all_cards_loader import AllCardsLoader
+from code.settings import USE_ALL_CARDS_FILE
+
+# Initialize logger
+logger = get_logger(__name__)
+
+# Shared loader instance for performance
+_shared_loader: Optional[AllCardsLoader] = None
+
+
+def _get_loader() -> AllCardsLoader:
+    """Get or create shared AllCardsLoader instance."""
+    global _shared_loader
+    if _shared_loader is None:
+        _shared_loader = AllCardsLoader()
+    return _shared_loader
+
+
+def _deprecation_warning(func_name: str, replacement: str) -> None:
+    """Log deprecation warning for legacy functions."""
+    warnings.warn(
+        f"{func_name} is deprecated and will be removed in v3.1+. "
+        f"Use {replacement} instead.",
+        DeprecationWarning,
+        stacklevel=3,
+    )
+    logger.warning(
+        f"DEPRECATION: {func_name} called. Migrate to {replacement} before v3.1+"
+    )
+
+
+def load_all_cards(use_cache: bool = True) -> pd.DataFrame:
+    """
+    Load all cards from consolidated Parquet file.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        use_cache: Whether to use cached data (default: True)
+    
+    Returns:
+        DataFrame containing all cards
+    
+    Deprecated:
+        Use AllCardsLoader().load() instead.
+    """
+    _deprecation_warning("load_all_cards()", "AllCardsLoader().load()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.load(force_reload=not use_cache)
+
+
+def load_cards_by_name(name: str) -> Optional[pd.Series]:
+    """
+    Load a single card by exact name match.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        name: Card name to search for
+    
+    Returns:
+        Series containing card data, or None if not found
+    
+    Deprecated:
+        Use AllCardsLoader().get_by_name() instead.
+    """
+    _deprecation_warning("load_cards_by_name()", "AllCardsLoader().get_by_name()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning None")
+        return None
+    
+    loader = _get_loader()
+    return loader.get_by_name(name)
+
+
+def load_cards_by_names(names: list[str]) -> pd.DataFrame:
+    """
+    Load multiple cards by exact name matches.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        names: List of card names to search for
+    
+    Returns:
+        DataFrame containing matching cards
+    
+    Deprecated:
+        Use AllCardsLoader().get_by_names() instead.
+    """
+    _deprecation_warning("load_cards_by_names()", "AllCardsLoader().get_by_names()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.get_by_names(names)
+
+
+def load_cards_by_type(type_str: str) -> pd.DataFrame:
+    """
+    Load cards by type line (partial match).
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        type_str: Type string to search for (e.g., "Creature", "Instant")
+    
+    Returns:
+        DataFrame containing cards matching the type
+    
+    Deprecated:
+        Use AllCardsLoader().filter_by_type() instead.
+    """
+    _deprecation_warning("load_cards_by_type()", "AllCardsLoader().filter_by_type()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.filter_by_type(type_str)
+
+
+def load_cards_with_tag(tag: str) -> pd.DataFrame:
+    """
+    Load cards containing a specific theme tag.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        tag: Theme tag to search for
+    
+    Returns:
+        DataFrame containing cards with the tag
+    
+    Deprecated:
+        Use AllCardsLoader().filter_by_themes() instead.
+    """
+    _deprecation_warning("load_cards_with_tag()", "AllCardsLoader().filter_by_themes()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.filter_by_themes([tag], mode="any")
+
+
+def load_cards_with_tags(tags: list[str], require_all: bool = False) -> pd.DataFrame:
+    """
+    Load cards containing theme tags.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        tags: List of theme tags to search for
+        require_all: If True, card must have all tags; if False, at least one tag
+    
+    Returns:
+        DataFrame containing cards matching the tag criteria
+    
+    Deprecated:
+        Use AllCardsLoader().filter_by_themes() instead.
+    """
+    _deprecation_warning(
+        "load_cards_with_tags()", "AllCardsLoader().filter_by_themes()"
+    )
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    mode = "all" if require_all else "any"
+    return loader.filter_by_themes(tags, mode=mode)
+
+
+def load_cards_by_color_identity(colors: list[str]) -> pd.DataFrame:
+    """
+    Load cards by color identity.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        colors: List of color codes (e.g., ["W", "U"])
+    
+    Returns:
+        DataFrame containing cards matching the color identity
+    
+    Deprecated:
+        Use AllCardsLoader().filter_by_color_identity() instead.
+    """
+    _deprecation_warning(
+        "load_cards_by_color_identity()", "AllCardsLoader().filter_by_color_identity()"
+    )
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.filter_by_color_identity(colors)
+
+
+def search_cards(query: str, limit: int = 100) -> pd.DataFrame:
+    """
+    Search cards by text query.
+    
+    Legacy function for backward compatibility.
+    
+    Args:
+        query: Search query string
+        limit: Maximum number of results
+    
+    Returns:
+        DataFrame containing matching cards
+    
+    Deprecated:
+        Use AllCardsLoader().search() instead.
+    """
+    _deprecation_warning("search_cards()", "AllCardsLoader().search()")
+    
+    if not USE_ALL_CARDS_FILE:
+        logger.warning("USE_ALL_CARDS_FILE is disabled, returning empty DataFrame")
+        return pd.DataFrame()
+    
+    loader = _get_loader()
+    return loader.search(query, limit=limit)
+
+
+def clear_card_cache() -> None:
+    """
+    Clear the cached card data, forcing next load to read from disk.
+    
+    Legacy function for backward compatibility.
+    
+    Deprecated:
+        Use AllCardsLoader().clear_cache() instead.
+    """
+    _deprecation_warning("clear_card_cache()", "AllCardsLoader().clear_cache()")
+    
+    global _shared_loader
+    if _shared_loader is not None:
+        _shared_loader.clear_cache()
+        _shared_loader = None
--- a/code/settings.py
+++ b/code/settings.py
@ -89,17 +89,34 @@ COLUMN_ORDER = CARD_COLUMN_ORDER
 TAGGED_COLUMN_ORDER = CARD_COLUMN_ORDER
 REQUIRED_COLUMNS = REQUIRED_CARD_COLUMNS

-MAIN_MENU_ITEMS: List[str] = ['Build A Deck', 'Setup CSV Files', 'Tag CSV Files', 'Quit']
+# MAIN_MENU_ITEMS, SETUP_MENU_ITEMS, CSV_DIRECTORY already defined above (lines 67-70)

-SETUP_MENU_ITEMS: List[str] = ['Initial Setup', 'Regenerate CSV', 'Main Menu']
+CARD_FILES_DIRECTORY: str = 'card_files'  # Parquet files for consolidated card data

-CSV_DIRECTORY: str = 'csv_files'
+# ----------------------------------------------------------------------------------
+# PARQUET MIGRATION SETTINGS (v3.0.0+)
+# ----------------------------------------------------------------------------------

-# Configuration for handling null/NA values in DataFrame columns
-FILL_NA_COLUMNS: Dict[str, Optional[str]] = {
-    'colorIdentity': 'Colorless',  # Default color identity for cards without one
-    'faceName': None  # Use card's name column value when face name is not available
-}
+# Card files directory structure (Parquet-based)
+# Override with environment variables for custom paths
+CARD_FILES_DIR = os.getenv('CARD_FILES_DIR', 'card_files')
+CARD_FILES_RAW_DIR = os.getenv('CARD_FILES_RAW_DIR', os.path.join(CARD_FILES_DIR, 'raw'))
+CARD_FILES_PROCESSED_DIR = os.getenv('CARD_FILES_PROCESSED_DIR', os.path.join(CARD_FILES_DIR, 'processed'))
+
+# Legacy CSV compatibility mode (v3.0.0 only, removed in v3.1.0)
+# Enable CSV fallback for testing or migration troubleshooting
+# Set to '1' or 'true' to enable CSV fallback when Parquet loading fails
+LEGACY_CSV_COMPAT = os.getenv('LEGACY_CSV_COMPAT', '0').lower() in ('1', 'true', 'on', 'enabled')
+
+# FILL_NA_COLUMNS already defined above (lines 75-78)
+
+# ----------------------------------------------------------------------------------
+# ALL CARDS CONSOLIDATION FEATURE FLAG
+# ----------------------------------------------------------------------------------
+
+# Enable use of consolidated all_cards.parquet file (default: True)
+# Set to False to disable and fall back to individual CSV file loading
+USE_ALL_CARDS_FILE = os.getenv('USE_ALL_CARDS_FILE', '1').lower() not in ('0', 'false', 'off', 'disabled')

 # ----------------------------------------------------------------------------------
 # TAGGING REFINEMENT FEATURE FLAGS (M1-M5)
@ -115,4 +132,28 @@ TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECTION_GRANTS', '1').lower() not in (
 TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '1').lower() not in ('0', 'false', 'off', 'disabled')

 # M5: Enable protection scope filtering in deck builder (completed - Phase 1-3, in progress Phase 4+)
-TAG_PROTECTION_SCOPE = os.getenv('TAG_PROTECTION_SCOPE', '1').lower() not in ('0', 'false', 'off', 'disabled')
+TAG_PROTECTION_SCOPE = os.getenv('TAG_PROTECTION_SCOPE', '1').lower() not in ('0', 'false', 'off', 'disabled')
+
+# ----------------------------------------------------------------------------------
+# CARD BROWSER FEATURE FLAGS
+# ----------------------------------------------------------------------------------
+
+# Enable card detail pages (default: OFF)
+# Set to '1' or 'true' to enable card detail pages in card browser
+ENABLE_CARD_DETAILS = os.getenv('ENABLE_CARD_DETAILS', '0').lower() not in ('0', 'false', 'off', 'disabled')
+
+# Enable similarity/synergy features (default: OFF)
+# Requires ENABLE_CARD_DETAILS=1 and manual cache build via Setup/Tag page
+# Shows similar cards based on theme tag overlap using containment scoring
+ENABLE_CARD_SIMILARITIES = os.getenv('ENABLE_CARD_SIMILARITIES', '0').lower() not in ('0', 'false', 'off', 'disabled')
+
+# Similarity cache configuration
+SIMILARITY_CACHE_PATH = os.getenv('SIMILARITY_CACHE_PATH', 'card_files/similarity_cache.json')
+SIMILARITY_CACHE_MAX_AGE_DAYS = int(os.getenv('SIMILARITY_CACHE_MAX_AGE_DAYS', '7'))
+
+# Allow downloading pre-built cache from GitHub (saves 15-20 min build time)
+# Set to '0' to always build locally (useful for custom seeds or offline environments)
+SIMILARITY_CACHE_DOWNLOAD = os.getenv('SIMILARITY_CACHE_DOWNLOAD', '1').lower() not in ('0', 'false', 'off', 'disabled')
+
+# Batch build feature flag (Build X and Compare)
+ENABLE_BATCH_BUILD = os.getenv('ENABLE_BATCH_BUILD', '1').lower() not in ('0', 'false', 'off', 'disabled')
--- a/code/tagging/benchmark_tagging.py
+++ b/code/tagging/benchmark_tagging.py
@ -0,0 +1,264 @@
+"""Benchmark tagging approaches: tag-centric vs card-centric.
+
+Compares performance of:
+1. Tag-centric (current): Multiple passes, one per tag type
+2. Card-centric (new): Single pass, all tags per card
+
+Usage:
+    python code/tagging/benchmark_tagging.py
+    
+Or in Python:
+    from code.tagging.benchmark_tagging import run_benchmark
+    run_benchmark()
+"""
+
+from __future__ import annotations
+
+import time
+
+import pandas as pd
+
+from file_setup.data_loader import DataLoader
+from logging_util import get_logger
+from path_util import get_processed_cards_path
+
+logger = get_logger(__name__)
+
+
+def load_sample_data(sample_size: int = 1000) -> pd.DataFrame:
+    """Load a sample of cards for benchmarking.
+    
+    Args:
+        sample_size: Number of cards to sample (default: 1000)
+        
+    Returns:
+        DataFrame with sampled cards
+    """
+    logger.info(f"Loading {sample_size} cards for benchmark")
+    
+    all_cards_path = get_processed_cards_path()
+    loader = DataLoader()
+    
+    df = loader.read_cards(all_cards_path, format="parquet")
+    
+    # Sample random cards (reproducible)
+    if len(df) > sample_size:
+        df = df.sample(n=sample_size, random_state=42)
+    
+    # Reset themeTags for fair comparison
+    df['themeTags'] = pd.Series([[] for _ in range(len(df))], index=df.index)
+    
+    logger.info(f"Loaded {len(df)} cards for benchmarking")
+    return df
+
+
+def benchmark_tag_centric(df: pd.DataFrame, iterations: int = 3) -> dict:
+    """Benchmark the traditional tag-centric approach.
+    
+    Simulates the multi-pass approach where each tag function
+    iterates through all cards.
+    
+    Args:
+        df: DataFrame to tag
+        iterations: Number of times to run (for averaging)
+        
+    Returns:
+        Dict with timing stats
+    """
+    import re
+    
+    times = []
+    
+    for i in range(iterations):
+        test_df = df.copy()
+        
+        # Initialize themeTags
+        if 'themeTags' not in test_df.columns:
+            test_df['themeTags'] = pd.Series([[] for _ in range(len(test_df))], index=test_df.index)
+        
+        start = time.perf_counter()
+        
+        # PASS 1: Ramp tags
+        for idx in test_df.index:
+            text = str(test_df.at[idx, 'text']).lower()
+            if re.search(r'add.*mana|search.*land|ramp', text):
+                tags = test_df.at[idx, 'themeTags']
+                if not isinstance(tags, list):
+                    tags = []
+                if 'Ramp' not in tags:
+                    tags.append('Ramp')
+                test_df.at[idx, 'themeTags'] = tags
+        
+        # PASS 2: Card draw tags
+        for idx in test_df.index:
+            text = str(test_df.at[idx, 'text']).lower()
+            if re.search(r'draw.*card|card draw', text):
+                tags = test_df.at[idx, 'themeTags']
+                if not isinstance(tags, list):
+                    tags = []
+                if 'Card Draw' not in tags:
+                    tags.append('Card Draw')
+                test_df.at[idx, 'themeTags'] = tags
+        
+        # PASS 3: Removal tags
+        for idx in test_df.index:
+            text = str(test_df.at[idx, 'text']).lower()
+            if re.search(r'destroy|exile|counter|return.*hand', text):
+                tags = test_df.at[idx, 'themeTags']
+                if not isinstance(tags, list):
+                    tags = []
+                for tag in ['Removal', 'Interaction']:
+                    if tag not in tags:
+                        tags.append(tag)
+                test_df.at[idx, 'themeTags'] = tags
+        
+        # PASS 4: Token tags
+        for idx in test_df.index:
+            text = str(test_df.at[idx, 'text']).lower()
+            if re.search(r'create.*token|token.*creature', text):
+                tags = test_df.at[idx, 'themeTags']
+                if not isinstance(tags, list):
+                    tags = []
+                if 'Tokens' not in tags:
+                    tags.append('Tokens')
+                test_df.at[idx, 'themeTags'] = tags
+        
+        # PASS 5: Card type tags
+        for idx in test_df.index:
+            type_line = str(test_df.at[idx, 'type']).lower()
+            tags = test_df.at[idx, 'themeTags']
+            if not isinstance(tags, list):
+                tags = []
+            if 'creature' in type_line and 'Creature' not in tags:
+                tags.append('Creature')
+            if 'artifact' in type_line and 'Artifact' not in tags:
+                tags.append('Artifact')
+            test_df.at[idx, 'themeTags'] = tags
+        
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        
+        logger.info(f"Tag-centric iteration {i+1}/{iterations}: {elapsed:.3f}s")
+    
+    return {
+        'approach': 'tag-centric',
+        'iterations': iterations,
+        'times': times,
+        'mean': sum(times) / len(times),
+        'min': min(times),
+        'max': max(times),
+    }
+
+
+def benchmark_card_centric(df: pd.DataFrame, iterations: int = 3) -> dict:
+    """Benchmark the new card-centric approach.
+    
+    Args:
+        df: DataFrame to tag
+        iterations: Number of times to run (for averaging)
+        
+    Returns:
+        Dict with timing stats
+    """
+    from tagging.tagger_card_centric import tag_all_cards_single_pass
+    
+    times = []
+    
+    for i in range(iterations):
+        test_df = df.copy()
+        
+        start = time.perf_counter()
+        
+        tag_all_cards_single_pass(test_df)
+        
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        
+        logger.info(f"Card-centric iteration {i+1}/{iterations}: {elapsed:.3f}s")
+    
+    return {
+        'approach': 'card-centric',
+        'iterations': iterations,
+        'times': times,
+        'mean': sum(times) / len(times),
+        'min': min(times),
+        'max': max(times),
+    }
+
+
+def run_benchmark(sample_sizes: list[int] = [100, 500, 1000, 5000]) -> None:
+    """Run comprehensive benchmark comparing both approaches.
+    
+    Args:
+        sample_sizes: List of dataset sizes to test
+    """
+    print("\n" + "="*80)
+    print("TAGGING APPROACH BENCHMARK")
+    print("="*80)
+    print("\nComparing:")
+    print("  1. Tag-centric (current): Multiple passes, one per tag type")
+    print("  2. Card-centric (new):    Single pass, all tags per card")
+    print()
+    
+    results = []
+    
+    for size in sample_sizes:
+        print(f"\n{'─'*80}")
+        print(f"Testing with {size:,} cards...")
+        print(f"{'─'*80}")
+        
+        df = load_sample_data(sample_size=size)
+        
+        # Benchmark tag-centric
+        print("\n▶ Tag-centric approach:")
+        tag_centric_result = benchmark_tag_centric(df, iterations=3)
+        print(f"  Mean: {tag_centric_result['mean']:.3f}s")
+        print(f"  Range: {tag_centric_result['min']:.3f}s - {tag_centric_result['max']:.3f}s")
+        
+        # Benchmark card-centric
+        print("\n▶ Card-centric approach:")
+        card_centric_result = benchmark_card_centric(df, iterations=3)
+        print(f"  Mean: {card_centric_result['mean']:.3f}s")
+        print(f"  Range: {card_centric_result['min']:.3f}s - {card_centric_result['max']:.3f}s")
+        
+        # Compare
+        speedup = tag_centric_result['mean'] / card_centric_result['mean']
+        winner = "Card-centric" if speedup > 1 else "Tag-centric"
+        
+        print(f"\n{'─'*40}")
+        if speedup > 1:
+            print(f"✓ {winner} is {speedup:.2f}x FASTER")
+        else:
+            print(f"✓ {winner} is {1/speedup:.2f}x FASTER")
+        print(f"{'─'*40}")
+        
+        results.append({
+            'size': size,
+            'tag_centric_mean': tag_centric_result['mean'],
+            'card_centric_mean': card_centric_result['mean'],
+            'speedup': speedup,
+            'winner': winner,
+        })
+    
+    # Summary
+    print("\n" + "="*80)
+    print("SUMMARY")
+    print("="*80)
+    print(f"\n{'Size':<10} {'Tag-Centric':<15} {'Card-Centric':<15} {'Speedup':<10} {'Winner':<15}")
+    print("─" * 80)
+    
+    for r in results:
+        print(f"{r['size']:<10,} {r['tag_centric_mean']:<15.3f} {r['card_centric_mean']:<15.3f} {r['speedup']:<10.2f}x {r['winner']:<15}")
+    
+    # Overall recommendation
+    avg_speedup = sum(r['speedup'] for r in results) / len(results)
+    print("\n" + "="*80)
+    if avg_speedup > 1:
+        print(f"RECOMMENDATION: Use CARD-CENTRIC (avg {avg_speedup:.2f}x faster)")
+    else:
+        print(f"RECOMMENDATION: Use TAG-CENTRIC (avg {1/avg_speedup:.2f}x faster)")
+    print("="*80 + "\n")
+
+
+if __name__ == "__main__":
+    run_benchmark()
--- a/code/tagging/bracket_policy_applier.py
+++ b/code/tagging/bracket_policy_applier.py
@ -30,14 +30,14 @@ try:
    import logging_util
 except Exception:
    # Fallback for direct module loading
-    import importlib.util  # type: ignore
+    import importlib.util
    root = Path(__file__).resolve().parents[1]
    lu_path = root / 'logging_util.py'
    spec = importlib.util.spec_from_file_location('logging_util', str(lu_path))
    mod = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
    assert spec and spec.loader
-    spec.loader.exec_module(mod)  # type: ignore[assignment]
-    logging_util = mod  # type: ignore
+    spec.loader.exec_module(mod)
+    logging_util = mod

 logger = logging_util.logging.getLogger(__name__)
 logger.setLevel(logging_util.LOG_LEVEL)
--- a/code/tagging/colorless_filter_applier.py
+++ b/code/tagging/colorless_filter_applier.py
@ -0,0 +1,121 @@
+"""Apply 'Useless in Colorless' metadata tags to cards that don't work in colorless identity decks.
+
+This module identifies and tags cards using regex patterns to match oracle text:
+1. Cards referencing "your commander's color identity"
+2. Cards that reduce costs of colored spells
+3. Cards that trigger on casting colored spells
+
+Examples include:
+- Arcane Signet, Command Tower (commander color identity)
+- Pearl/Sapphire/Jet/Ruby/Emerald Medallion (colored cost reduction)
+- Oketra's/Kefnet's/Bontu's/Hazoret's/Rhonas's Monument (colored creature cost reduction)
+- Shrine of Loyal Legions, etc. (colored spell triggers)
+"""
+from __future__ import annotations
+import logging
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+# Regex patterns for cards that don't work in colorless identity decks
+COLORLESS_FILTER_PATTERNS = [
+    # Cards referencing "your commander's color identity"
+    # BUT exclude Commander's Plate (protection from colors NOT in identity = amazing in colorless!)
+    # and Study Hall (still draws/scrys in colorless)
+    r"commander'?s?\s+color\s+identity",
+    
+    # Colored cost reduction - medallions and monuments
+    # Matches: "white spells you cast cost", "blue creature spells you cast cost", etc.
+    # Use non-capturing groups to avoid pandas UserWarning
+    r"(?:white|blue|black|red|green)\s+(?:creature\s+)?spells?\s+you\s+cast\s+cost.*less",
+    
+    # Colored spell triggers - shrines and similar
+    # Matches: "whenever you cast a white spell", etc.
+    # Use non-capturing groups to avoid pandas UserWarning
+    r"whenever\s+you\s+cast\s+a\s+(?:white|blue|black|red|green)\s+spell",
+]
+
+# Cards that should NOT be filtered despite matching patterns
+# These cards actually work great in colorless decks
+COLORLESS_FILTER_EXCEPTIONS = [
+    "Commander's Plate",  # Protection from colors NOT in identity = protection from all colors in colorless!
+    "Study Hall",         # Still provides colorless mana and scrys when casting commander
+]
+
+USELESS_IN_COLORLESS_TAG = "Useless in Colorless"
+
+
+def apply_colorless_filter_tags(df: pd.DataFrame) -> None:
+    """Apply 'Useless in Colorless' metadata tag to cards that don't work in colorless decks.
+    
+    Uses regex patterns to identify cards in oracle text that:
+    - Reference "your commander's color identity"
+    - Reduce costs of colored spells
+    - Trigger on casting colored spells
+    
+    Modifies the DataFrame in-place by adding tags to the 'themeTags' column.
+    These tags will later be moved to 'metadataTags' during the partition phase.
+    
+    Args:
+        df: DataFrame with 'name', 'text', and 'themeTags' columns
+        
+    Returns:
+        None (modifies DataFrame in-place)
+    """
+    if 'name' not in df.columns:
+        logger.warning("No 'name' column found, skipping colorless filter tagging")
+        return
+        
+    if 'text' not in df.columns:
+        logger.warning("No 'text' column found, skipping colorless filter tagging")
+        return
+        
+    if 'themeTags' not in df.columns:
+        logger.warning("No 'themeTags' column found, skipping colorless filter tagging")
+        return
+    
+    # Combine all patterns with OR (use non-capturing groups to avoid pandas warning)
+    combined_pattern = "|".join(f"(?:{pattern})" for pattern in COLORLESS_FILTER_PATTERNS)
+    
+    # Find cards matching any pattern
+    df['text'] = df['text'].fillna('')
+    matches_pattern = df['text'].str.contains(
+        combined_pattern,
+        case=False,
+        regex=True,
+        na=False
+    )
+    
+    # Exclude cards that work well in colorless despite matching patterns
+    is_exception = df['name'].isin(COLORLESS_FILTER_EXCEPTIONS)
+    matches_pattern = matches_pattern & ~is_exception
+    
+    tagged_count = 0
+    
+    for idx in df[matches_pattern].index:
+        card_name = df.at[idx, 'name']
+        tags = df.at[idx, 'themeTags']
+        
+        # Ensure themeTags is a list
+        if not isinstance(tags, list):
+            tags = []
+        
+        # Add tag if not already present
+        if USELESS_IN_COLORLESS_TAG not in tags:
+            tags.append(USELESS_IN_COLORLESS_TAG)
+            df.at[idx, 'themeTags'] = tags
+            tagged_count += 1
+            logger.debug(f"Tagged '{card_name}' with '{USELESS_IN_COLORLESS_TAG}'")
+    
+    if tagged_count > 0:
+        logger.info(f"Applied '{USELESS_IN_COLORLESS_TAG}' tag to {tagged_count} cards")
+    else:
+        logger.info(f"No '{USELESS_IN_COLORLESS_TAG}' tags applied (no matches or already tagged)")
+
+
+__all__ = [
+    "apply_colorless_filter_tags",
+    "COLORLESS_FILTER_PATTERNS",
+    "COLORLESS_FILTER_EXCEPTIONS",
+    "USELESS_IN_COLORLESS_TAG",
+]
--- a/code/tagging/combo_tag_applier.py
+++ b/code/tagging/combo_tag_applier.py
@ -11,9 +11,6 @@ from typing import DefaultDict, Dict, List, Set
 # Third-party imports
 import pandas as pd

-# Local application imports
-from settings import CSV_DIRECTORY, SETUP_COLORS
-

@dataclass(frozen=True)
 class ComboPair:
@ -95,57 +92,73 @@ def _safe_list_parse(s: object) -> List[str]:
    return []


-def apply_combo_tags(colors: List[str] | None = None, combos_path: str | Path = "config/card_lists/combos.json", csv_dir: str | Path | None = None) -> Dict[str, int]:
-    """Apply bidirectional comboTags to per-color CSVs based on combos.json.
+def apply_combo_tags(
+    df: pd.DataFrame | None = None,
+    combos_path: str | Path = "config/card_lists/combos.json"
+) -> Dict[str, int]:
+    """Apply bidirectional comboTags to DataFrame based on combos.json.
+    
+    This function modifies the DataFrame in-place when called from the tagging pipeline.
+    It can also be called standalone without a DataFrame for legacy/CLI usage.

-    Returns a dict of color->updated_row_count for quick reporting.
+    Args:
+        df: DataFrame to modify in-place (from tagging pipeline), or None for standalone usage
+        combos_path: Path to combos.json file
+
+    Returns:
+        Dict with 'total' key showing count of cards with combo tags
    """
-    colors = colors or list(SETUP_COLORS)
    combos_file = Path(combos_path)
    pairs = _load_pairs(combos_file)
-
+    
+    # If no DataFrame provided, load from Parquet (standalone mode)
+    standalone_mode = df is None
+    if standalone_mode:
+        parquet_path = "card_files/processed/all_cards.parquet"
+        parquet_file = Path(parquet_path)
+        if not parquet_file.exists():
+            raise FileNotFoundError(f"Parquet file not found: {parquet_file}")
+        df = pd.read_parquet(parquet_file)
+    
+    _ensure_combo_cols(df)
+    before_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
+    
+    # Build an index of canonicalized keys -> actual DF row names to update
+    name_index: DefaultDict[str, Set[str]] = defaultdict(set)
+    for nm in df["name"].astype(str).tolist():
+        canon = _canonicalize(nm)
+        cf = canon.casefold()
+        name_index[cf].add(nm)
+        # If split/fused faces exist, map each face to the combined row name as well
+        if " // " in canon:
+            for part in canon.split(" // "):
+                p = part.strip().casefold()
+                if p:
+                    name_index[p].add(nm)
+    
+    # Apply all combo pairs
+    for p in pairs:
+        a = _canonicalize(p.a)
+        b = _canonicalize(p.b)
+        a_key = a.casefold()
+        b_key = b.casefold()
+        # Apply A<->B bidirectionally to any matching DF rows
+        _apply_partner_to_names(df, name_index.get(a_key, set()), b)
+        _apply_partner_to_names(df, name_index.get(b_key, set()), a)
+    
+    after_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
+    
+    # Calculate updated counts
    updated_counts: Dict[str, int] = {}
-    base_dir = Path(csv_dir) if csv_dir is not None else Path(CSV_DIRECTORY)
-    for color in colors:
-        csv_path = base_dir / f"{color}_cards.csv"
-        if not csv_path.exists():
-            continue
-        df = pd.read_csv(csv_path, converters={
-            "themeTags": _safe_list_parse,
-            "creatureTypes": _safe_list_parse,
-            "comboTags": _safe_list_parse,
-        })
-
-        _ensure_combo_cols(df)
-        before_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
-
-        # Build an index of canonicalized keys -> actual DF row names to update.
-        name_index: DefaultDict[str, Set[str]] = defaultdict(set)
-        for nm in df["name"].astype(str).tolist():
-            canon = _canonicalize(nm)
-            cf = canon.casefold()
-            name_index[cf].add(nm)
-            # If split/fused faces exist, map each face to the combined row name as well
-            if " // " in canon:
-                for part in canon.split(" // "):
-                    p = part.strip().casefold()
-                    if p:
-                        name_index[p].add(nm)
-
-        for p in pairs:
-            a = _canonicalize(p.a)
-            b = _canonicalize(p.b)
-            a_key = a.casefold()
-            b_key = b.casefold()
-            # Apply A<->B bidirectionally to any matching DF rows
-            _apply_partner_to_names(df, name_index.get(a_key, set()), b)
-            _apply_partner_to_names(df, name_index.get(b_key, set()), a)
-
-        after_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
-        if before_hash != after_hash:
-            df.to_csv(csv_path, index=False)
-            updated_counts[color] = int((df["comboTags"].apply(bool)).sum())
-
+    if before_hash != after_hash:
+        updated_counts["total"] = int((df["comboTags"].apply(bool)).sum())
+    else:
+        updated_counts["total"] = 0
+    
+    # Only write back to Parquet in standalone mode
+    if standalone_mode and before_hash != after_hash:
+        df.to_parquet(parquet_file, index=False)
+    
    return updated_counts


--- a/code/tagging/multi_face_merger.py
+++ b/code/tagging/multi_face_merger.py
@ -240,6 +240,13 @@ def merge_multi_face_rows(

        faces_payload = [_build_face_payload(row) for _, row in group_sorted.iterrows()]

+        # M9: Capture back face type for MDFC land detection
+        if len(group_sorted) >= 2 and "type" in group_sorted.columns:
+            back_face_row = group_sorted.iloc[1]
+            back_type = str(back_face_row.get("type", "") or "")
+            if back_type:
+                work_df.at[primary_idx, "backType"] = back_type
+
        drop_indices.extend(group_sorted.index[1:])
        
        merged_count += 1
--- a/code/tagging/old/combo_tag_applier.py
+++ b/code/tagging/old/combo_tag_applier.py
@ -0,0 +1,156 @@
+from __future__ import annotations
+
+# Standard library imports
+import ast
+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import DefaultDict, Dict, List, Set
+
+# Third-party imports
+import pandas as pd
+
+# Local application imports
+from settings import CSV_DIRECTORY, SETUP_COLORS
+
+
+@dataclass(frozen=True)
+class ComboPair:
+    a: str
+    b: str
+    cheap_early: bool = False
+    setup_dependent: bool = False
+    tags: List[str] | None = None
+
+
+def _load_pairs(path: Path) -> List[ComboPair]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    pairs = []
+    for entry in data.get("pairs", []):
+        pairs.append(
+            ComboPair(
+                a=entry["a"].strip(),
+                b=entry["b"].strip(),
+                cheap_early=bool(entry.get("cheap_early", False)),
+                setup_dependent=bool(entry.get("setup_dependent", False)),
+                tags=list(entry.get("tags", [])),
+            )
+        )
+    return pairs
+
+
+def _canonicalize(name: str) -> str:
+    # Canonicalize for matching: trim, unify punctuation/quotes, collapse spaces, casefold later
+    if name is None:
+        return ""
+    s = str(name).strip()
+    # Normalize common unicode punctuation variants
+    s = s.replace("\u2019", "'")  # curly apostrophe to straight
+    s = s.replace("\u2018", "'")
+    s = s.replace("\u201C", '"').replace("\u201D", '"')
+    s = s.replace("\u2013", "-").replace("\u2014", "-")  # en/em dash -> hyphen
+    # Collapse multiple spaces
+    s = " ".join(s.split())
+    return s
+
+
+def _ensure_combo_cols(df: pd.DataFrame) -> None:
+    if "comboTags" not in df.columns:
+        df["comboTags"] = [[] for _ in range(len(df))]
+
+
+def _apply_partner_to_names(df: pd.DataFrame, target_names: Set[str], partner: str) -> None:
+    if not target_names:
+        return
+    mask = df["name"].isin(target_names)
+    if not mask.any():
+        return
+    current = df.loc[mask, "comboTags"]
+    df.loc[mask, "comboTags"] = current.apply(
+        lambda tags: sorted(list({*tags, partner})) if isinstance(tags, list) else [partner]
+    )
+
+
+def _safe_list_parse(s: object) -> List[str]:
+    if isinstance(s, list):
+        return s
+    if not isinstance(s, str) or not s.strip():
+        return []
+    txt = s.strip()
+    # Try JSON first
+    try:
+        v = json.loads(txt)
+        if isinstance(v, list):
+            return v
+    except Exception:
+        pass
+    # Fallback to Python literal
+    try:
+        v = ast.literal_eval(txt)
+        if isinstance(v, list):
+            return v
+    except Exception:
+        pass
+    return []
+
+
+def apply_combo_tags(colors: List[str] | None = None, combos_path: str | Path = "config/card_lists/combos.json", csv_dir: str | Path | None = None) -> Dict[str, int]:
+    """Apply bidirectional comboTags to per-color CSVs based on combos.json.
+
+    Returns a dict of color->updated_row_count for quick reporting.
+    """
+    colors = colors or list(SETUP_COLORS)
+    combos_file = Path(combos_path)
+    pairs = _load_pairs(combos_file)
+
+    updated_counts: Dict[str, int] = {}
+    base_dir = Path(csv_dir) if csv_dir is not None else Path(CSV_DIRECTORY)
+    for color in colors:
+        csv_path = base_dir / f"{color}_cards.csv"
+        if not csv_path.exists():
+            continue
+        df = pd.read_csv(csv_path, converters={
+            "themeTags": _safe_list_parse,
+            "creatureTypes": _safe_list_parse,
+            "comboTags": _safe_list_parse,
+        })
+
+        _ensure_combo_cols(df)
+        before_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
+
+        # Build an index of canonicalized keys -> actual DF row names to update.
+        name_index: DefaultDict[str, Set[str]] = defaultdict(set)
+        for nm in df["name"].astype(str).tolist():
+            canon = _canonicalize(nm)
+            cf = canon.casefold()
+            name_index[cf].add(nm)
+            # If split/fused faces exist, map each face to the combined row name as well
+            if " // " in canon:
+                for part in canon.split(" // "):
+                    p = part.strip().casefold()
+                    if p:
+                        name_index[p].add(nm)
+
+        for p in pairs:
+            a = _canonicalize(p.a)
+            b = _canonicalize(p.b)
+            a_key = a.casefold()
+            b_key = b.casefold()
+            # Apply A<->B bidirectionally to any matching DF rows
+            _apply_partner_to_names(df, name_index.get(a_key, set()), b)
+            _apply_partner_to_names(df, name_index.get(b_key, set()), a)
+
+        after_hash = pd.util.hash_pandas_object(df[["name", "comboTags"]].astype(str)).sum()
+        if before_hash != after_hash:
+            df.to_csv(csv_path, index=False)
+            updated_counts[color] = int((df["comboTags"].apply(bool)).sum())
+
+    return updated_counts
+
+
+if __name__ == "__main__":
+    counts = apply_combo_tags()
+    print("Updated comboTags counts:")
+    for k, v in counts.items():
+        print(f"  {k}: {v}")
--- a/code/tagging/old/tagger.py
+++ b/code/tagging/old/tagger.py
--- a/code/tagging/parallel_utils.py
+++ b/code/tagging/parallel_utils.py
@ -0,0 +1,134 @@
+"""Utilities for parallel card tagging operations.
+
+This module provides functions to split DataFrames by color identity for
+parallel processing and merge them back together. This enables the tagging
+system to use ProcessPoolExecutor for significant performance improvements
+while maintaining the unified Parquet approach.
+"""
+
+from __future__ import annotations
+
+from typing import Dict
+import pandas as pd
+import logging_util
+
+logger = logging_util.logging.getLogger(__name__)
+logger.setLevel(logging_util.LOG_LEVEL)
+logger.addHandler(logging_util.file_handler)
+logger.addHandler(logging_util.stream_handler)
+
+
+def split_by_color_identity(df: pd.DataFrame) -> Dict[str, pd.DataFrame]:
+    """Split DataFrame into color identity groups for parallel processing.
+    
+    Each color identity group is a separate DataFrame that can be tagged
+    independently. This function preserves all columns and ensures no cards
+    are lost during the split.
+    
+    Color identity groups are based on the 'colorIdentity' column which contains
+    strings like 'W', 'WU', 'WUB', 'WUBRG', etc.
+    
+    Args:
+        df: DataFrame containing all cards with 'colorIdentity' column
+        
+    Returns:
+        Dictionary mapping color identity strings to DataFrames
+        Example: {'W': df_white, 'WU': df_azorius, '': df_colorless, ...}
+        
+    Raises:
+        ValueError: If 'colorIdentity' column is missing
+    """
+    if 'colorIdentity' not in df.columns:
+        raise ValueError("DataFrame must have 'colorIdentity' column for parallel splitting")
+    
+    # Group by color identity
+    groups: Dict[str, pd.DataFrame] = {}
+    
+    for color_id, group_df in df.groupby('colorIdentity', dropna=False):
+        # Handle NaN/None as colorless
+        if pd.isna(color_id):
+            color_id = ''
+        
+        # Convert to string (in case it's already a string, this is safe)
+        color_id_str = str(color_id)
+        
+        # Create a copy to avoid SettingWithCopyWarning in parallel workers
+        groups[color_id_str] = group_df.copy()
+        
+        logger.debug(f"Split group '{color_id_str}': {len(group_df)} cards")
+    
+    # Verify split is complete
+    total_split = sum(len(group_df) for group_df in groups.values())
+    if total_split != len(df):
+        logger.warning(
+            f"Split verification failed: {total_split} cards in groups vs {len(df)} original. "
+            f"Some cards may be missing!"
+        )
+    else:
+        logger.info(f"Split {len(df)} cards into {len(groups)} color identity groups")
+    
+    return groups
+
+
+def merge_color_groups(groups: Dict[str, pd.DataFrame]) -> pd.DataFrame:
+    """Merge tagged color identity groups back into a single DataFrame.
+    
+    This function concatenates all color group DataFrames and ensures:
+    - All columns are preserved
+    - No duplicate cards (by index)
+    - Proper index handling
+    - Consistent column ordering
+    
+    Args:
+        groups: Dictionary mapping color identity strings to tagged DataFrames
+        
+    Returns:
+        Single DataFrame containing all tagged cards
+        
+    Raises:
+        ValueError: If groups is empty or contains invalid DataFrames
+    """
+    if not groups:
+        raise ValueError("Cannot merge empty color groups")
+    
+    # Verify all values are DataFrames
+    for color_id, group_df in groups.items():
+        if not isinstance(group_df, pd.DataFrame):
+            raise ValueError(f"Group '{color_id}' is not a DataFrame: {type(group_df)}")
+    
+    # Concatenate all groups
+    # ignore_index=False preserves original indices
+    # sort=False maintains column order from first DataFrame
+    merged_df = pd.concat(groups.values(), ignore_index=False, sort=False)
+    
+    # Check for duplicate indices (shouldn't happen if split was lossless)
+    if merged_df.index.duplicated().any():
+        logger.warning(
+            f"Found {merged_df.index.duplicated().sum()} duplicate indices after merge. "
+            f"This may indicate a bug in the split/merge process."
+        )
+        # Remove duplicates (keep first occurrence)
+        merged_df = merged_df[~merged_df.index.duplicated(keep='first')]
+    
+    # Verify merge is complete
+    total_merged = len(merged_df)
+    total_groups = sum(len(group_df) for group_df in groups.values())
+    
+    if total_merged != total_groups:
+        logger.warning(
+            f"Merge verification failed: {total_merged} cards in result vs {total_groups} in groups. "
+            f"Lost {total_groups - total_merged} cards!"
+        )
+    else:
+        logger.info(f"Merged {len(groups)} color groups into {total_merged} cards")
+    
+    # Reset index to ensure clean sequential indexing
+    merged_df = merged_df.reset_index(drop=True)
+    
+    return merged_df
+
+
+__all__ = [
+    'split_by_color_identity',
+    'merge_color_groups',
+]
--- a/code/tagging/tag_constants.py
+++ b/code/tagging/tag_constants.py
@ -1072,6 +1072,9 @@ METADATA_TAG_ALLOWLIST: set[str] = {
    # Cost reduction diagnostics (from Applied: namespace)
    'Applied: Cost Reduction',
    
+    # Colorless commander filtering (M1)
+    'Useless in Colorless',
+    
    # Kindred-specific protection metadata (from M2)
    # Format: "{CreatureType}s Gain Protection"
    # These are auto-generated for kindred-specific protection grants
--- a/code/tagging/tag_index.py
+++ b/code/tagging/tag_index.py
@ -0,0 +1,425 @@
+"""Fast tag indexing for reverse lookups and bulk operations.
+
+Provides a reverse index (tag → cards) for efficient tag-based queries.
+Typical queries complete in <1ms after index is built.
+
+Usage:
+    # Build index from all_cards
+    index = TagIndex()
+    index.build()
+    
+    # Query cards with specific tag
+    cards = index.get_cards_with_tag("ramp")  # Returns set of card names
+    
+    # Query cards with multiple tags (AND logic)
+    cards = index.get_cards_with_all_tags(["tokens", "sacrifice"])
+    
+    # Query cards with any of several tags (OR logic)
+    cards = index.get_cards_with_any_tags(["lifegain", "lifelink"])
+    
+    # Get tags for a specific card
+    tags = index.get_tags_for_card("Sol Ring")
+"""
+from __future__ import annotations
+
+import json
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Set, Optional
+
+from code.logging_util import get_logger
+from code.services.all_cards_loader import AllCardsLoader
+
+logger = get_logger(__name__)
+
+# Default cache path for persisted index
+DEFAULT_CACHE_PATH = Path("card_files/.tag_index_metadata.json")
+
+
+@dataclass
+class IndexStats:
+    """Statistics about the tag index."""
+    total_cards: int
+    total_tags: int
+    total_mappings: int
+    build_time_seconds: float
+    indexed_at: float  # Unix timestamp
+    all_cards_mtime: float  # Unix timestamp of source file
+
+
+class TagIndex:
+    """Fast reverse index for tag-based card queries.
+    
+    Builds two indexes:
+    - tag → set(card names) - Reverse index for fast tag queries
+    - card → list(tags) - Forward index for card tag lookups
+    
+    Performance:
+    - Index build: <5s for 50k cards
+    - Query time: <1ms per lookup
+    - Memory: ~50-100MB for 30k cards
+    """
+    
+    def __init__(self, cache_path: Optional[Path] = None):
+        """Initialize empty tag index.
+        
+        Args:
+            cache_path: Path to persist index (default: card_files/.tag_index_metadata.json)
+        """
+        self._tag_to_cards: Dict[str, Set[str]] = {}
+        self._card_to_tags: Dict[str, List[str]] = {}
+        self._stats: Optional[IndexStats] = None
+        self._cache_path = cache_path or DEFAULT_CACHE_PATH
+        self._loader = AllCardsLoader()
+    
+    def build(self, force_rebuild: bool = False) -> IndexStats:
+        """Build the tag index from all_cards.
+        
+        Loads all_cards and creates reverse index. If a cached index exists
+        and is up-to-date, loads from cache instead.
+        
+        Args:
+            force_rebuild: If True, rebuild even if cache is valid
+            
+        Returns:
+            IndexStats with build metrics
+        """
+        # Check if we can use cached index
+        if not force_rebuild and self._try_load_from_cache():
+            logger.info(f"Loaded tag index from cache: {self._stats.total_cards} cards, {self._stats.total_tags} tags")
+            return self._stats
+        
+        logger.info("Building tag index from all_cards...")
+        start_time = time.perf_counter()
+        
+        # Load all cards
+        df = self._loader.load()
+        
+        if "themeTags" not in df.columns:
+            logger.warning("themeTags column not found in all_cards")
+            self._stats = IndexStats(
+                total_cards=0,
+                total_tags=0,
+                total_mappings=0,
+                build_time_seconds=0,
+                indexed_at=time.time(),
+                all_cards_mtime=0
+            )
+            return self._stats
+        
+        # Clear existing indexes
+        self._tag_to_cards.clear()
+        self._card_to_tags.clear()
+        
+        # Build indexes
+        total_mappings = 0
+        for _, row in df.iterrows():
+            name = row.get("name")
+            if not name:
+                continue
+            
+            tags = self._normalize_tags(row.get("themeTags", []))
+            if not tags:
+                continue
+            
+            # Store forward mapping (card → tags)
+            self._card_to_tags[name] = tags
+            
+            # Build reverse mapping (tag → cards)
+            for tag in tags:
+                if tag not in self._tag_to_cards:
+                    self._tag_to_cards[tag] = set()
+                self._tag_to_cards[tag].add(name)
+                total_mappings += 1
+        
+        build_time = time.perf_counter() - start_time
+        
+        # Get all_cards mtime for cache validation
+        all_cards_mtime = 0
+        if os.path.exists(self._loader.file_path):
+            all_cards_mtime = os.path.getmtime(self._loader.file_path)
+        
+        self._stats = IndexStats(
+            total_cards=len(self._card_to_tags),
+            total_tags=len(self._tag_to_cards),
+            total_mappings=total_mappings,
+            build_time_seconds=build_time,
+            indexed_at=time.time(),
+            all_cards_mtime=all_cards_mtime
+        )
+        
+        logger.info(
+            f"Built tag index: {self._stats.total_cards} cards, "
+            f"{self._stats.total_tags} unique tags, "
+            f"{self._stats.total_mappings} mappings in {build_time:.2f}s"
+        )
+        
+        # Save to cache
+        self._save_to_cache()
+        
+        return self._stats
+    
+    def _normalize_tags(self, tags: object) -> List[str]:
+        """Normalize tags from various formats to list of strings.
+        
+        Handles:
+        - List of strings/objects
+        - String representations like "['tag1', 'tag2']"
+        - Comma-separated strings
+        - Empty/None values
+        """
+        if not tags:
+            return []
+        
+        if isinstance(tags, list):
+            # Already a list - normalize to strings
+            return [str(t).strip() for t in tags if t and str(t).strip()]
+        
+        if isinstance(tags, str):
+            # Handle empty or list repr
+            if not tags or tags == "[]":
+                return []
+            
+            # Try parsing as list repr
+            if tags.startswith("["):
+                import ast
+                try:
+                    parsed = ast.literal_eval(tags)
+                    if isinstance(parsed, list):
+                        return [str(t).strip() for t in parsed if t and str(t).strip()]
+                except (ValueError, SyntaxError):
+                    pass
+            
+            # Fall back to comma-separated
+            return [t.strip() for t in tags.split(",") if t.strip()]
+        
+        return []
+    
+    def get_cards_with_tag(self, tag: str) -> Set[str]:
+        """Get all card names that have a specific tag.
+        
+        Args:
+            tag: Theme tag to search for (case-sensitive)
+            
+        Returns:
+            Set of card names with the tag (empty if tag not found)
+            
+        Performance: O(1) lookup after index is built
+        """
+        return self._tag_to_cards.get(tag, set()).copy()
+    
+    def get_cards_with_all_tags(self, tags: List[str]) -> Set[str]:
+        """Get cards that have ALL specified tags (AND logic).
+        
+        Args:
+            tags: List of tags (card must have all of them)
+            
+        Returns:
+            Set of card names with all tags (empty if no matches)
+            
+        Performance: O(k) where k is number of tags
+        """
+        if not tags:
+            return set()
+        
+        # Start with cards for first tag
+        result = self.get_cards_with_tag(tags[0])
+        
+        # Intersect with cards for each additional tag
+        for tag in tags[1:]:
+            result &= self.get_cards_with_tag(tag)
+            if not result:
+                # Short-circuit if no cards remain
+                break
+        
+        return result
+    
+    def get_cards_with_any_tags(self, tags: List[str]) -> Set[str]:
+        """Get cards that have ANY of the specified tags (OR logic).
+        
+        Args:
+            tags: List of tags (card needs at least one)
+            
+        Returns:
+            Set of card names with at least one tag
+            
+        Performance: O(k) where k is number of tags
+        """
+        result: Set[str] = set()
+        for tag in tags:
+            result |= self.get_cards_with_tag(tag)
+        return result
+    
+    def get_tags_for_card(self, card_name: str) -> List[str]:
+        """Get all tags for a specific card.
+        
+        Args:
+            card_name: Name of the card
+            
+        Returns:
+            List of theme tags for the card (empty if not found)
+            
+        Performance: O(1) lookup
+        """
+        return self._card_to_tags.get(card_name, []).copy()
+    
+    def get_all_tags(self) -> List[str]:
+        """Get list of all tags in the index.
+        
+        Returns:
+            Sorted list of all unique tags
+        """
+        return sorted(self._tag_to_cards.keys())
+    
+    def get_tag_stats(self, tag: str) -> Dict[str, int]:
+        """Get statistics for a specific tag.
+        
+        Args:
+            tag: Tag to get stats for
+            
+        Returns:
+            Dict with 'card_count' key
+        """
+        return {
+            "card_count": len(self._tag_to_cards.get(tag, set()))
+        }
+    
+    def get_popular_tags(self, limit: int = 50) -> List[tuple[str, int]]:
+        """Get most popular tags sorted by card count.
+        
+        Args:
+            limit: Maximum number of tags to return
+            
+        Returns:
+            List of (tag, card_count) tuples sorted by count descending
+        """
+        tag_counts = [
+            (tag, len(cards))
+            for tag, cards in self._tag_to_cards.items()
+        ]
+        tag_counts.sort(key=lambda x: x[1], reverse=True)
+        return tag_counts[:limit]
+    
+    def _save_to_cache(self) -> None:
+        """Save index to cache file."""
+        if not self._stats:
+            return
+        
+        try:
+            cache_data = {
+                "stats": {
+                    "total_cards": self._stats.total_cards,
+                    "total_tags": self._stats.total_tags,
+                    "total_mappings": self._stats.total_mappings,
+                    "build_time_seconds": self._stats.build_time_seconds,
+                    "indexed_at": self._stats.indexed_at,
+                    "all_cards_mtime": self._stats.all_cards_mtime
+                },
+                "tag_to_cards": {
+                    tag: list(cards)
+                    for tag, cards in self._tag_to_cards.items()
+                },
+                "card_to_tags": self._card_to_tags
+            }
+            
+            self._cache_path.parent.mkdir(parents=True, exist_ok=True)
+            with self._cache_path.open("w", encoding="utf-8") as f:
+                json.dump(cache_data, f, indent=2)
+            
+            logger.debug(f"Saved tag index cache to {self._cache_path}")
+            
+        except Exception as e:
+            logger.warning(f"Failed to save tag index cache: {e}")
+    
+    def _try_load_from_cache(self) -> bool:
+        """Try to load index from cache file.
+        
+        Returns:
+            True if cache loaded successfully and is up-to-date
+        """
+        if not self._cache_path.exists():
+            return False
+        
+        try:
+            with self._cache_path.open("r", encoding="utf-8") as f:
+                cache_data = json.load(f)
+            
+            # Check if cache is up-to-date
+            stats_data = cache_data.get("stats", {})
+            cached_mtime = stats_data.get("all_cards_mtime", 0)
+            
+            current_mtime = 0
+            if os.path.exists(self._loader.file_path):
+                current_mtime = os.path.getmtime(self._loader.file_path)
+            
+            if current_mtime > cached_mtime:
+                logger.debug("Tag index cache outdated (all_cards modified)")
+                return False
+            
+            # Load indexes
+            self._tag_to_cards = {
+                tag: set(cards)
+                for tag, cards in cache_data.get("tag_to_cards", {}).items()
+            }
+            self._card_to_tags = cache_data.get("card_to_tags", {})
+            
+            # Restore stats
+            self._stats = IndexStats(**stats_data)
+            
+            return True
+            
+        except Exception as e:
+            logger.warning(f"Failed to load tag index cache: {e}")
+            return False
+    
+    def clear_cache(self) -> None:
+        """Delete the cached index file."""
+        if self._cache_path.exists():
+            self._cache_path.unlink()
+            logger.debug(f"Deleted tag index cache: {self._cache_path}")
+    
+    def get_stats(self) -> Optional[IndexStats]:
+        """Get index statistics.
+        
+        Returns:
+            IndexStats if index has been built, None otherwise
+        """
+        return self._stats
+
+
+# Global index instance
+_global_index: Optional[TagIndex] = None
+
+
+def get_tag_index(force_rebuild: bool = False) -> TagIndex:
+    """Get or create the global tag index.
+    
+    Lazy-loads the index on first access. Subsequent calls return
+    the cached instance.
+    
+    Args:
+        force_rebuild: If True, rebuild the index even if cached
+        
+    Returns:
+        Global TagIndex instance
+    """
+    global _global_index
+    
+    if _global_index is None or force_rebuild:
+        _global_index = TagIndex()
+        _global_index.build(force_rebuild=force_rebuild)
+    elif _global_index._stats is None:
+        # Index exists but hasn't been built yet
+        _global_index.build()
+    
+    return _global_index
+
+
+def clear_global_index() -> None:
+    """Clear the global tag index instance."""
+    global _global_index
+    if _global_index:
+        _global_index.clear_cache()
+    _global_index = None
--- a/code/tagging/tag_loader.py
+++ b/code/tagging/tag_loader.py
@ -0,0 +1,229 @@
+"""Efficient tag loading using consolidated all_cards file.
+
+Provides batch tag loading functions that leverage the all_cards.parquet file
+instead of reading individual card CSV files. This is 10-50x faster for bulk
+operations like deck building.
+
+Usage:
+    # Load tags for multiple cards at once
+    tags_dict = load_tags_for_cards(["Sol Ring", "Lightning Bolt", "Counterspell"])
+    # Returns: {"Sol Ring": ["artifacts"], "Lightning Bolt": ["burn"], ...}
+    
+    # Load tags for a single card
+    tags = load_tags_for_card("Sol Ring")
+    # Returns: ["artifacts", "ramp"]
+"""
+from __future__ import annotations
+
+import os
+from typing import Dict, List, Optional
+
+from code.logging_util import get_logger
+from code.services.all_cards_loader import AllCardsLoader
+
+logger = get_logger(__name__)
+
+# Global loader instance for caching
+_loader_instance: Optional[AllCardsLoader] = None
+
+
+def _get_loader() -> AllCardsLoader:
+    """Get or create the global AllCardsLoader instance."""
+    global _loader_instance
+    if _loader_instance is None:
+        _loader_instance = AllCardsLoader()
+    return _loader_instance
+
+
+def clear_cache() -> None:
+    """Clear the cached all_cards data (useful after updates)."""
+    global _loader_instance
+    _loader_instance = None
+
+
+def load_tags_for_cards(card_names: List[str]) -> Dict[str, List[str]]:
+    """Load theme tags for multiple cards in one batch operation.
+    
+    This is much faster than loading tags for each card individually,
+    especially when dealing with 50+ cards (typical deck size).
+    
+    Args:
+        card_names: List of card names to load tags for
+        
+    Returns:
+        Dictionary mapping card name to list of theme tags.
+        Cards not found or without tags will have empty list.
+        
+    Example:
+        >>> tags = load_tags_for_cards(["Sol Ring", "Lightning Bolt"])
+        >>> tags["Sol Ring"]
+        ["artifacts", "ramp"]
+    """
+    if not card_names:
+        return {}
+    
+    loader = _get_loader()
+    
+    try:
+        # Batch lookup - single query for all cards
+        df = loader.get_by_names(card_names)
+        
+        if df.empty:
+            logger.debug(f"No cards found for {len(card_names)} names")
+            return {name: [] for name in card_names}
+        
+        # Extract tags from DataFrame
+        result: Dict[str, List[str]] = {}
+        
+        if "themeTags" not in df.columns:
+            logger.warning("themeTags column not found in all_cards")
+            return {name: [] for name in card_names}
+        
+        # Build lookup dictionary
+        for _, row in df.iterrows():
+            name = row.get("name")
+            if not name:
+                continue
+                
+            tags = row.get("themeTags", [])
+            
+            # Handle different themeTags formats
+            if isinstance(tags, list):
+                # Already a list - use directly
+                result[name] = [str(t).strip() for t in tags if t]
+            elif isinstance(tags, str):
+                # String format - could be comma-separated or list repr
+                if not tags or tags == "[]":
+                    result[name] = []
+                elif tags.startswith("["):
+                    # List representation like "['tag1', 'tag2']"
+                    import ast
+                    try:
+                        parsed = ast.literal_eval(tags)
+                        if isinstance(parsed, list):
+                            result[name] = [str(t).strip() for t in parsed if t]
+                        else:
+                            result[name] = []
+                    except (ValueError, SyntaxError):
+                        # Fallback to comma split
+                        result[name] = [t.strip() for t in tags.split(",") if t.strip()]
+                else:
+                    # Comma-separated tags
+                    result[name] = [t.strip() for t in tags.split(",") if t.strip()]
+            else:
+                result[name] = []
+        
+        # Fill in missing cards with empty lists
+        for name in card_names:
+            if name not in result:
+                result[name] = []
+        
+        return result
+        
+    except FileNotFoundError:
+        logger.warning("all_cards file not found, returning empty tags")
+        return {name: [] for name in card_names}
+    except Exception as e:
+        logger.error(f"Error loading tags for cards: {e}")
+        return {name: [] for name in card_names}
+
+
+def load_tags_for_card(card_name: str) -> List[str]:
+    """Load theme tags for a single card.
+    
+    For loading tags for multiple cards, use load_tags_for_cards() instead
+    for better performance.
+    
+    Args:
+        card_name: Name of the card
+        
+    Returns:
+        List of theme tags for the card (empty if not found)
+        
+    Example:
+        >>> tags = load_tags_for_card("Sol Ring")
+        >>> "artifacts" in tags
+        True
+    """
+    result = load_tags_for_cards([card_name])
+    return result.get(card_name, [])
+
+
+def get_cards_with_tag(tag: str, limit: Optional[int] = None) -> List[str]:
+    """Get all card names that have a specific tag.
+    
+    Args:
+        tag: Theme tag to search for
+        limit: Maximum number of cards to return (None = no limit)
+        
+    Returns:
+        List of card names with the tag
+        
+    Example:
+        >>> cards = get_cards_with_tag("ramp", limit=10)
+        >>> len(cards) <= 10
+        True
+    """
+    loader = _get_loader()
+    
+    try:
+        df = loader.filter_by_themes([tag], mode="any")
+        
+        if "name" not in df.columns:
+            return []
+        
+        cards = df["name"].tolist()
+        
+        if limit is not None and len(cards) > limit:
+            return cards[:limit]
+        
+        return cards
+        
+    except Exception as e:
+        logger.error(f"Error getting cards with tag '{tag}': {e}")
+        return []
+
+
+def get_cards_with_all_tags(tags: List[str], limit: Optional[int] = None) -> List[str]:
+    """Get all card names that have ALL of the specified tags.
+    
+    Args:
+        tags: List of theme tags (card must have all of them)
+        limit: Maximum number of cards to return (None = no limit)
+        
+    Returns:
+        List of card names with all specified tags
+        
+    Example:
+        >>> cards = get_cards_with_all_tags(["ramp", "artifacts"])
+        >>> # Returns cards that have both ramp AND artifacts tags
+    """
+    loader = _get_loader()
+    
+    try:
+        df = loader.filter_by_themes(tags, mode="all")
+        
+        if "name" not in df.columns:
+            return []
+        
+        cards = df["name"].tolist()
+        
+        if limit is not None and len(cards) > limit:
+            return cards[:limit]
+        
+        return cards
+        
+    except Exception as e:
+        logger.error(f"Error getting cards with all tags {tags}: {e}")
+        return []
+
+
+def is_use_all_cards_enabled() -> bool:
+    """Check if all_cards-based tag loading is enabled.
+    
+    Returns:
+        True if USE_ALL_CARDS_FOR_TAGS is enabled (default: True)
+    """
+    # Check environment variable
+    env_value = os.environ.get("USE_ALL_CARDS_FOR_TAGS", "true").lower()
+    return env_value in ("1", "true", "yes", "on")
--- a/code/tagging/tag_utils.py
+++ b/code/tagging/tag_utils.py
@ -841,7 +841,42 @@ def tag_with_rules_and_logging(
            affected |= mask
    
    count = affected.sum()
-    color_part = f'{color} ' if color else ''
+    # M4 (Parquet Migration): Display color identity more clearly
+    if color:
+        # Map color codes to friendly names
+        color_map = {
+            'w': 'white',
+            'u': 'blue',
+            'b': 'black',
+            'r': 'red',
+            'g': 'green',
+            'wu': 'Azorius',
+            'wb': 'Orzhov',
+            'wr': 'Boros',
+            'wg': 'Selesnya',
+            'ub': 'Dimir',
+            'ur': 'Izzet',
+            'ug': 'Simic',
+            'br': 'Rakdos',
+            'bg': 'Golgari',
+            'rg': 'Gruul',
+            'wub': 'Esper',
+            'wur': 'Jeskai',
+            'wug': 'Bant',
+            'wbr': 'Mardu',
+            'wbg': 'Abzan',
+            'wrg': 'Naya',
+            'ubr': 'Grixis',
+            'ubg': 'Sultai',
+            'urg': 'Temur',
+            'brg': 'Jund',
+            'wubrg': '5-color',
+            '': 'colorless'
+        }
+        color_display = color_map.get(color, color)
+        color_part = f'{color_display} '
+    else:
+        color_part = ''
    full_message = f'Tagged {count} {color_part}{summary_message}'
    
    if logger:
--- a/code/tagging/tagger.py
+++ b/code/tagging/tagger.py
@ -16,16 +16,38 @@ from . import regex_patterns as rgx
 from . import tag_constants
 from . import tag_utils
 from .bracket_policy_applier import apply_bracket_policy_tags
+from .colorless_filter_applier import apply_colorless_filter_tags
+from .combo_tag_applier import apply_combo_tags
 from .multi_face_merger import merge_multi_face_rows
 import logging_util
-from file_setup import setup
-from file_setup.setup_utils import enrich_commander_rows_with_tags
-from settings import COLORS, CSV_DIRECTORY, MULTIPLE_COPY_CARDS
+from file_setup.data_loader import DataLoader
+from settings import COLORS, MULTIPLE_COPY_CARDS
 logger = logging_util.logging.getLogger(__name__)
 logger.setLevel(logging_util.LOG_LEVEL)
 logger.addHandler(logging_util.file_handler)
 logger.addHandler(logging_util.stream_handler)

+# Create DataLoader instance for Parquet operations
+_data_loader = DataLoader()
+
+
+def _get_batch_id_for_color(color: str) -> int:
+    """Get unique batch ID for a color (for parallel-safe batch writes).
+    
+    Args:
+        color: Color name (e.g., 'white', 'blue', 'commander')
+    
+    Returns:
+        Unique integer batch ID based on COLORS index
+    """
+    try:
+        return COLORS.index(color)
+    except ValueError:
+        # Fallback for unknown colors (shouldn't happen)
+        logger.warning(f"Unknown color '{color}', using hash-based batch ID")
+        return hash(color) % 1000
+
+
 _MERGE_FLAG_RAW = str(os.getenv("ENABLE_DFC_MERGE", "") or "").strip().lower()
 if _MERGE_FLAG_RAW in {"0", "false", "off", "disabled"}:
    logger.warning(
@ -150,10 +172,11 @@ def _merge_summary_recorder(color: str):


 def _write_compat_snapshot(df: pd.DataFrame, color: str) -> None:
-    try:  # type: ignore[name-defined]
+    """Write DFC compatibility snapshot (diagnostic output, kept as CSV for now)."""
+    try:
        _DFC_COMPAT_DIR.mkdir(parents=True, exist_ok=True)
        path = _DFC_COMPAT_DIR / f"{color}_cards_unmerged.csv"
-        df.to_csv(path, index=False)
+        df.to_csv(path, index=False)  # M3: Kept as CSV (diagnostic only, not main data flow)
        logger.info("Wrote unmerged snapshot for %s to %s", color, path)
    except Exception as exc:
        logger.warning("Failed to write unmerged snapshot for %s: %s", color, exc)
@ -304,71 +327,135 @@ def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str,
    return df, diagnostics

 ### Setup
-## Load the dataframe
-def load_dataframe(color: str) -> None:
+## Load and tag all cards from Parquet (M3: no longer per-color)
+def load_and_tag_all_cards(parallel: bool = False, max_workers: int | None = None) -> None:
    """
-    Load and validate the card dataframe for a given color.
-
+    Load all cards from Parquet, apply tags, write back.
+    
+    M3.13: Now supports parallel tagging for significant performance improvement.
+    
    Args:
-        color (str): The color of cards to load ('white', 'blue', etc)
-
+        parallel: If True, use parallel tagging (recommended - 2-3x faster)
+        max_workers: Maximum parallel workers (default: CPU count)
+    
    Raises:
-        FileNotFoundError: If CSV file doesn't exist and can't be regenerated
+        FileNotFoundError: If all_cards.parquet doesn't exist
        ValueError: If required columns are missing
    """
    try:
-        filepath = f'{CSV_DIRECTORY}/{color}_cards.csv'
-
-        # Check if file exists, regenerate if needed
-        if not os.path.exists(filepath):
-            logger.warning(f'{color}_cards.csv not found, regenerating it.')
-            setup.regenerate_csv_by_color(color)
-            if not os.path.exists(filepath):
-                raise FileNotFoundError(f"Failed to generate {filepath}")
-
-        # Load initial dataframe for validation
-        check_df = pd.read_csv(filepath)
-        required_columns = ['creatureTypes', 'themeTags'] 
-        missing_columns = [col for col in required_columns if col not in check_df.columns]
+        from code.path_util import get_processed_cards_path
+        
+        # Load from all_cards.parquet
+        all_cards_path = get_processed_cards_path()
+        
+        if not os.path.exists(all_cards_path):
+            raise FileNotFoundError(
+                f"Processed cards file not found: {all_cards_path}. "
+                "Run initial_setup_parquet() first."
+            )
+        
+        logger.info(f"Loading all cards from {all_cards_path}")
+        
+        # Load all cards from Parquet
+        df = _data_loader.read_cards(all_cards_path, format="parquet")
+        logger.info(f"Loaded {len(df)} cards for tagging")
+        
+        # Validate and add required columns
+        required_columns = ['creatureTypes', 'themeTags']
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        
        if missing_columns:
            logger.warning(f"Missing columns: {missing_columns}")
-            if 'creatureTypes' not in check_df.columns:
-                kindred_tagging(check_df, color)
-            if 'themeTags' not in check_df.columns:
-                create_theme_tags(check_df, color)
-
-            # Persist newly added columns before re-reading with converters
-            try:
-                check_df.to_csv(filepath, index=False)
-            except Exception as e:
-                logger.error(f'Failed to persist added columns to {filepath}: {e}')
-                raise
-
-            # Verify columns were added successfully
-            check_df = pd.read_csv(filepath)
-            still_missing = [col for col in required_columns if col not in check_df.columns]
-            if still_missing:
-                raise ValueError(f"Failed to add required columns: {still_missing}")
-
-        # Load final dataframe with proper converters
-        # M3: metadataTags is optional (may not exist in older CSVs)
-        converters = {'themeTags': pd.eval, 'creatureTypes': pd.eval}
-        if 'metadataTags' in check_df.columns:
-            converters['metadataTags'] = pd.eval
+            
+            if 'creatureTypes' not in df.columns:
+                kindred_tagging(df, 'wubrg')  # Use wubrg (all colors) for unified tagging
+            
+            if 'themeTags' not in df.columns:
+                create_theme_tags(df, 'wubrg')
        
-        df = pd.read_csv(filepath, converters=converters)
-        tag_by_color(df, color)
+        # Parquet stores lists natively, no need for converters
+        # Just ensure list columns are properly initialized
+        if 'themeTags' in df.columns and df['themeTags'].isna().any():
+            df['themeTags'] = df['themeTags'].apply(lambda x: x if isinstance(x, list) else [])
+        
+        if 'creatureTypes' in df.columns and df['creatureTypes'].isna().any():
+            df['creatureTypes'] = df['creatureTypes'].apply(lambda x: x if isinstance(x, list) else [])
+        
+        if 'metadataTags' in df.columns and df['metadataTags'].isna().any():
+            df['metadataTags'] = df['metadataTags'].apply(lambda x: x if isinstance(x, list) else [])
+        
+        # M3.13: Run tagging (parallel or sequential)
+        if parallel:
+            logger.info("Using PARALLEL tagging (ProcessPoolExecutor)")
+            df_tagged = tag_all_cards_parallel(df, max_workers=max_workers)
+        else:
+            logger.info("Using SEQUENTIAL tagging (single-threaded)")
+            df_tagged = _tag_all_cards_sequential(df)
+        
+        # M3.13: Common post-processing (DFC merge, sorting, partitioning, writing)
+        color = 'wubrg'
+        
+        # Merge multi-face entries before final ordering (feature-flagged)
+        if DFC_COMPAT_SNAPSHOT:
+            try:
+                _write_compat_snapshot(df_tagged.copy(deep=True), color)
+            except Exception:
+                pass
+
+        df_merged = merge_multi_face_rows(df_tagged, color, logger=logger, recorder=_merge_summary_recorder(color))
+        
+        # Commander enrichment - TODO: Update for Parquet
+        logger.info("Commander enrichment temporarily disabled for Parquet migration")
+
+        # Sort all theme tags for easier reading and reorder columns
+        df_final = sort_theme_tags(df_merged, color)
+        
+        # Apply combo tags (Commander Spellbook integration) - must run after merge
+        apply_combo_tags(df_final)
+        
+        # M3: Partition metadata tags from theme tags
+        df_final, partition_diagnostics = _apply_metadata_partition(df_final)
+        if partition_diagnostics.get("enabled"):
+            logger.info(f"Metadata partition: {partition_diagnostics['metadata_tags_moved']} metadata, "
+                       f"{partition_diagnostics['theme_tags_kept']} theme tags")
+        
+        # M3: Write directly to all_cards.parquet
+        output_path = get_processed_cards_path()
+        _data_loader.write_cards(df_final, output_path, format="parquet")
+        logger.info(f'✓ Wrote {len(df_final)} tagged cards to {output_path}')
+        
+        # M7: Write commander-only cache file for fast lookups
+        try:
+            if 'isCommander' in df_final.columns:
+                commander_df = df_final[df_final['isCommander'] == True].copy()  # noqa: E712
+                commander_path = os.path.join(os.path.dirname(output_path), 'commander_cards.parquet')
+                _data_loader.write_cards(commander_df, commander_path, format="parquet")
+                logger.info(f'✓ Wrote {len(commander_df)} commanders to {commander_path}')
+        except Exception as e:
+            logger.warning(f'Failed to write commander cache: {e}')

    except FileNotFoundError as e:
        logger.error(f'Error: {e}')
        raise
-    except pd.errors.ParserError as e:
-        logger.error(f'Error parsing the CSV file: {e}')
-        raise
    except Exception as e:
-        logger.error(f'An unexpected error occurred: {e}')
+        logger.error(f'An unexpected error occurred during tagging: {e}')
        raise

+
+# M3: Keep old load_dataframe for backward compatibility (deprecated)
+def load_dataframe(color: str) -> None:
+    """DEPRECATED: Use load_and_tag_all_cards() instead.
+    
+    M3 Note: This function is kept for backward compatibility but should
+    not be used. The per-color approach was only needed for CSV files.
+    """
+    logger.warning(
+        f"load_dataframe({color}) is deprecated in Parquet migration. "
+        "This will process all cards unnecessarily."
+    )
+    load_and_tag_all_cards()
+
+
 def _tag_foundational_categories(df: pd.DataFrame, color: str) -> None:
    """Apply foundational card categorization (creature types, card types, keywords).
    
@ -493,6 +580,9 @@ def tag_by_color(df: pd.DataFrame, color: str) -> None:
    
    # Apply bracket policy tags (from config/card_lists/*.json)
    apply_bracket_policy_tags(df)
+    
+    # Apply colorless filter tags (M1: Useless in Colorless)
+    apply_colorless_filter_tags(df)
    print('\n====================\n')

    # Merge multi-face entries before final ordering (feature-flagged)
@ -505,7 +595,9 @@ def tag_by_color(df: pd.DataFrame, color: str) -> None:
    df = merge_multi_face_rows(df, color, logger=logger, recorder=_merge_summary_recorder(color))

    if color == 'commander':
-        df = enrich_commander_rows_with_tags(df, CSV_DIRECTORY)
+        # M3 TODO: Update commander enrichment for Parquet
+        logger.warning("Commander enrichment temporarily disabled for Parquet migration")
+        # df = enrich_commander_rows_with_tags(df, CSV_DIRECTORY)

    # Sort all theme tags for easier reading and reorder columns
    df = sort_theme_tags(df, color)
@ -516,11 +608,214 @@ def tag_by_color(df: pd.DataFrame, color: str) -> None:
        logger.info(f"Metadata partition for {color}: {partition_diagnostics['metadata_tags_moved']} metadata, "
                   f"{partition_diagnostics['theme_tags_kept']} theme tags")
    
-    df.to_csv(f'{CSV_DIRECTORY}/{color}_cards.csv', index=False)
-    #print(df)
+    # M3: Write batch Parquet file instead of CSV
+    batch_id = _get_batch_id_for_color(color)
+    batch_path = _data_loader.write_batch_parquet(df, batch_id=batch_id, tag=color)
+    logger.info(f'✓ Wrote batch {batch_id} ({color}): {len(df)} cards → {batch_path}')
+
+
+## M3.13: Parallel worker function (runs in separate process)
+def _tag_color_group_worker(df_pickled: bytes, color_id: str) -> bytes:
+    """Worker function for parallel tagging (runs in separate process).
+    
+    This function is designed to run in a ProcessPoolExecutor worker. It receives
+    a pickled DataFrame subset (one color identity group), applies all tag functions,
+    and returns the tagged DataFrame (also pickled).
+    
+    Args:
+        df_pickled: Pickled DataFrame containing cards of a single color identity
+        color_id: Color identity string for logging (e.g., 'W', 'WU', 'WUBRG', '')
+        
+    Returns:
+        Pickled DataFrame with all tags applied
+        
+    Note:
+        - This function must be picklable itself (no lambdas, local functions, etc.)
+        - Logging is color-prefixed for easier debugging in parallel execution
+        - DFC merge is NOT done here (happens after parallel merge in main process)
+        - Uses 'wubrg' as the color parameter for tag functions (generic "all colors")
+    """
+    import pickle
+    
+    # Unpickle the DataFrame
+    df = pickle.loads(df_pickled)
+    
+    # Use 'wubrg' for tag functions (they don't actually need color-specific logic)
+    # Just use color_id for logging display
+    display_color = color_id if color_id else 'colorless'
+    tag_color = 'wubrg'  # Generic color for tag functions
+    
+    logger.info(f"[{display_color}] Starting tagging for {len(df)} cards")
+    
+    # Apply all tagging functions (same order as tag_all_cards)
+    # Note: Tag functions use tag_color ('wubrg') for internal logic
+    _tag_foundational_categories(df, tag_color)
+    _tag_mechanical_themes(df, tag_color)
+    _tag_strategic_themes(df, tag_color)
+    _tag_archetype_themes(df, tag_color)
+    
+    # Apply bracket policy tags (from config/card_lists/*.json)
+    apply_bracket_policy_tags(df)
+    
+    # Apply colorless filter tags (M1: Useless in Colorless)
+    apply_colorless_filter_tags(df)
+    
+    logger.info(f"[{display_color}] ✓ Completed tagging for {len(df)} cards")
+    
+    # Return pickled DataFrame
+    return pickle.dumps(df)
+
+
+## M3.13: Parallel tagging implementation
+def tag_all_cards_parallel(df: pd.DataFrame, max_workers: int | None = None) -> pd.DataFrame:
+    """Tag all cards using parallel processing by color identity groups.
+    
+    This function splits the input DataFrame by color identity, processes each
+    group in parallel using ProcessPoolExecutor, then merges the results back
+    together. This provides significant speedup over sequential processing.
+    
+    Args:
+        df: DataFrame containing all card data
+        max_workers: Maximum number of parallel workers (default: CPU count)
+        
+    Returns:
+        Tagged DataFrame (note: does NOT include DFC merge - caller handles that)
+        
+    Note:
+        - Typical speedup: 2-3x faster than sequential on multi-core systems
+        - Each color group is tagged independently (pure functions)
+        - DFC merge happens after parallel merge in calling function
+    """
+    from concurrent.futures import ProcessPoolExecutor, as_completed
+    from .parallel_utils import split_by_color_identity, merge_color_groups
+    import pickle
+    
+    logger.info(f"Starting parallel tagging for {len(df)} cards (max_workers={max_workers})")
+    
+    # Split into color identity groups
+    color_groups = split_by_color_identity(df)
+    logger.info(f"Split into {len(color_groups)} color identity groups")
+    
+    # Track results
+    tagged_groups: dict[str, pd.DataFrame] = {}
+    
+    # Process groups in parallel
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        # Submit all work
+        future_to_color = {
+            executor.submit(_tag_color_group_worker, pickle.dumps(group_df), color_id): color_id
+            for color_id, group_df in color_groups.items()
+        }
+        
+        # Collect results as they complete
+        completed = 0
+        total = len(future_to_color)
+        
+        for future in as_completed(future_to_color):
+            color_id = future_to_color[future]
+            display_color = color_id if color_id else 'colorless'
+            
+            try:
+                # Get result and unpickle
+                result_pickled = future.result()
+                tagged_df = pickle.loads(result_pickled)
+                tagged_groups[color_id] = tagged_df
+                
+                completed += 1
+                pct = int(completed * 100 / total)
+                logger.info(f"✓ [{display_color}] Completed ({completed}/{total}, {pct}%)")
+                
+            except Exception as e:
+                logger.error(f"✗ [{display_color}] Worker failed: {e}")
+                raise
+    
+    # Merge all tagged groups back together
+    logger.info("Merging tagged color groups...")
+    df_tagged = merge_color_groups(tagged_groups)
+    logger.info(f"✓ Parallel tagging complete: {len(df_tagged)} cards tagged")
+    
+    return df_tagged
+
+
+## M3.13: Sequential tagging (refactored to return DataFrame)
+def _tag_all_cards_sequential(df: pd.DataFrame) -> pd.DataFrame:
+    """Tag all cards sequentially (single-threaded).
+    
+    This is the sequential version used when parallel=False.
+    It applies all tag functions to the full DataFrame at once.
+    
+    Args:
+        df: DataFrame containing all card data
+        
+    Returns:
+        Tagged DataFrame (does NOT include DFC merge - caller handles that)
+    """
+    logger.info(f"Starting sequential tagging for {len(df)} cards")
+    
+    # M3: Use 'wubrg' as color identifier (represents all colors, exists in COLORS list)
+    color = 'wubrg'
+    
+    _tag_foundational_categories(df, color)
+    _tag_mechanical_themes(df, color)
+    _tag_strategic_themes(df, color)
+    _tag_archetype_themes(df, color)
+    
+    # Apply bracket policy tags (from config/card_lists/*.json)
+    apply_bracket_policy_tags(df)
+    
+    # Apply colorless filter tags (M1: Useless in Colorless)
+    apply_colorless_filter_tags(df)
    print('\n====================\n')
-    logger.info(f'Tags are done being set on {color}_cards.csv')
-    #keyboard.wait('esc')
+    
+    logger.info(f"✓ Sequential tagging complete: {len(df)} cards tagged")
+    return df
+
+
+## M3: Keep old tag_all_cards for backward compatibility (now calls sequential version)
+def tag_all_cards(df: pd.DataFrame) -> None:
+    """DEPRECATED: Use load_and_tag_all_cards() instead.
+    
+    This function is kept for backward compatibility but does the full
+    workflow including DFC merge and file writing, which may not be desired.
+    
+    Args:
+        df: DataFrame containing all card data
+    """
+    logger.warning("tag_all_cards() is deprecated. Use load_and_tag_all_cards() instead.")
+    
+    # Tag the cards (modifies df in-place)
+    _tag_all_cards_sequential(df)
+    
+    # Do post-processing (for backward compatibility)
+    color = 'wubrg'
+    
+    # Merge multi-face entries before final ordering (feature-flagged)
+    if DFC_COMPAT_SNAPSHOT:
+        try:
+            _write_compat_snapshot(df.copy(deep=True), color)
+        except Exception:
+            pass
+
+    df_merged = merge_multi_face_rows(df, color, logger=logger, recorder=_merge_summary_recorder(color))
+    
+    # Commander enrichment - TODO: Update for Parquet
+    logger.info("Commander enrichment temporarily disabled for Parquet migration")
+
+    # Sort all theme tags for easier reading and reorder columns
+    df_final = sort_theme_tags(df_merged, color)
+    
+    # M3: Partition metadata tags from theme tags
+    df_final, partition_diagnostics = _apply_metadata_partition(df_final)
+    if partition_diagnostics.get("enabled"):
+        logger.info(f"Metadata partition: {partition_diagnostics['metadata_tags_moved']} metadata, "
+                   f"{partition_diagnostics['theme_tags_kept']} theme tags")
+    
+    # M3: Write directly to all_cards.parquet
+    from code.path_util import get_processed_cards_path
+    output_path = get_processed_cards_path()
+    _data_loader.write_cards(df_final, output_path, format="parquet")
+    logger.info(f'✓ Wrote {len(df_final)} tagged cards to {output_path}')
+

 ## Determine any non-creature cards that have creature types mentioned
 def kindred_tagging(df: pd.DataFrame, color: str) -> None:
@ -769,7 +1064,7 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
            exclusion_keywords = {'partner'}

            def _merge_keywords(row: pd.Series) -> list[str]:
-                base_tags = row['themeTags'] if isinstance(row['themeTags'], list) else []
+                base_tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else []
                keywords_raw = row['keywords']

                if isinstance(keywords_raw, str):
@ -814,9 +1109,27 @@ def sort_theme_tags(df, color):
    # Sort the list of tags in-place per row
    df['themeTags'] = df['themeTags'].apply(tag_utils.sort_list)

-    # Reorder columns for final CSV output; return a reindexed copy
-    columns_to_keep = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
-    available = [c for c in columns_to_keep if c in df.columns]
+    # Reorder columns for final output
+    # M3: Preserve ALL columns (isCommander, isBackground, metadataTags, etc.)
+    # BUT exclude temporary cache columns (__*_s)
+    base_columns = ['name', 'faceName','edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side']
+    
+    # Add M3 columns if present
+    if 'metadataTags' in df.columns and 'metadataTags' not in base_columns:
+        base_columns.append('metadataTags')
+    
+    # Add columns from setup_parquet (isCommander, isBackground)
+    for col in ['isCommander', 'isBackground']:
+        if col in df.columns and col not in base_columns:
+            base_columns.append(col)
+    
+    # Preserve any other columns not in base list (flexibility for future additions)
+    # EXCEPT temporary cache columns (start with __)
+    for col in df.columns:
+        if col not in base_columns and not col.startswith('__'):
+            base_columns.append(col)
+    
+    available = [c for c in base_columns if c in df.columns]
    logger.info(f'Theme tags alphabetically sorted in {color}_cards.csv.')
    return df.reindex(columns=available)

@ -3940,7 +4253,9 @@ def tag_for_themes(df: pd.DataFrame, color: str) -> None:
        ValueError: If required DataFrame columns are missing
    """
    start_time = pd.Timestamp.now()
-    logger.info(f'Starting tagging for remaining themes in {color}_cards.csv')
+    # M4 (Parquet Migration): Updated logging to reflect unified tagging
+    color_display = color if color else 'colorless'
+    logger.info(f'Starting tagging for remaining themes in {color_display} cards')
    print('\n===============\n')
    tag_for_aggro(df, color)
    print('\n==========\n')
@ -5128,7 +5443,7 @@ def tag_for_multiple_copies(df: pd.DataFrame, color: str) -> None:
            # Add per-card rules for individual name tags
            rules.extend({'mask': (df['name'] == card_name), 'tags': [card_name]} for card_name in matching_cards)
            tag_utils.apply_rules(df, rules=rules)
-            logger.info(f'Tagged {multiple_copies_mask.sum()} cards with multiple copies effects for {color}')
+            logger.info(f'Tagged {multiple_copies_mask.sum()} cards with multiple copies effects')

    except Exception as e:
        logger.error(f'Error in tag_for_multiple_copies: {str(e)}')
@ -6379,7 +6694,7 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None:
            logger.info(f'Applied specific protection ability tags to {ability_tag_count} cards')

        # Log results
-        logger.info(f'Tagged {final_mask.sum()} cards with protection effects for {color}')
+        logger.info(f'Tagged {final_mask.sum()} cards with protection effects')

    except Exception as e:
        logger.error(f'Error in tag_for_protection: {str(e)}')
@ -6465,7 +6780,7 @@ def tag_for_phasing(df: pd.DataFrame, color: str) -> None:
            logger.info(f'Applied Removal tag to {removal_count} cards with opponent-targeting phasing')

        # Log results
-        logger.info(f'Tagged {phasing_mask.sum()} cards with phasing effects for {color}')
+        logger.info(f'Tagged {phasing_mask.sum()} cards with phasing effects')

    except Exception as e:
        logger.error(f'Error in tag_for_phasing: {str(e)}')
@ -6539,39 +6854,52 @@ def tag_for_removal(df: pd.DataFrame, color: str) -> None:
        raise

 def run_tagging(parallel: bool = False, max_workers: int | None = None):
-    """Run tagging across all COLORS.
+    """Run tagging on all cards (M3.13: now supports parallel processing).

    Args:
-        parallel: If True, process colors in parallel using multiple processes.
-        max_workers: Optional cap on worker processes.
+        parallel: If True, use parallel tagging (recommended - 2-3x faster)
+        max_workers: Maximum parallel workers (default: CPU count)
    """
    start_time = pd.Timestamp.now()

-    if parallel and DFC_PER_FACE_SNAPSHOT:
-        logger.warning("DFC_PER_FACE_SNAPSHOT=1 detected; per-face metadata snapshots require sequential tagging. Parallel run will skip snapshot emission.")
-
-    if parallel:
-        try:
-            import concurrent.futures as _f
-            # Use processes to bypass GIL; each color reads/writes distinct CSV
-            with _f.ProcessPoolExecutor(max_workers=max_workers) as ex:
-                futures = {ex.submit(load_dataframe, color): color for color in COLORS}
-                for fut in _f.as_completed(futures):
-                    color = futures[fut]
-                    try:
-                        fut.result()
-                    except Exception as e:
-                        logger.error(f'Parallel worker failed for {color}: {e}')
-                        raise
-        except Exception:
-            # Fallback to sequential on any multiprocessing setup error
-            logger.warning('Parallel mode failed to initialize; falling back to sequential.')
-            for color in COLORS:
-                load_dataframe(color)
-    else:
-        for color in COLORS:
-            load_dataframe(color)
+    if DFC_PER_FACE_SNAPSHOT:
+        logger.info("DFC_PER_FACE_SNAPSHOT enabled for unified tagging")

+    # M3.13: Unified tagging with optional parallelization
+    mode = "PARALLEL" if parallel else "SEQUENTIAL"
+    logger.info(f"Starting unified tagging ({mode} mode)")
+    load_and_tag_all_cards(parallel=parallel, max_workers=max_workers)
+    
+    # Flush per-face snapshots if enabled
    _flush_per_face_snapshot()
+    
    duration = (pd.Timestamp.now() - start_time).total_seconds()
-    logger.info(f'Tagged cards in {duration:.2f}s')
+    logger.info(f'✓ Tagged cards in {duration:.2f}s ({mode} mode)')
+    
+    # M4: Write tagging completion flag to processed directory
+    try:
+        import os
+        import json
+        from datetime import datetime, UTC
+        
+        flag_dir = os.path.join("card_files", "processed")
+        os.makedirs(flag_dir, exist_ok=True)
+        flag_path = os.path.join(flag_dir, ".tagging_complete.json")
+        
+        with open(flag_path, "w", encoding="utf-8") as f:
+            json.dump({
+                "completed_at": datetime.now(UTC).isoformat(timespec="seconds"),
+                "mode": mode,
+                "parallel": parallel,
+                "duration_seconds": duration
+            }, f, indent=2)
+        
+        logger.info(f"✓ Wrote tagging completion flag to {flag_path}")
+    except Exception as e:
+        logger.warning(f"Failed to write tagging completion flag: {e}")
+
+
+
+
+
+
--- a/code/tagging/tagger_card_centric.py
+++ b/code/tagging/tagger_card_centric.py
@ -0,0 +1,200 @@
+"""Card-centric tagging approach for performance comparison.
+
+This module implements a single-pass tagging strategy where we iterate
+through each card once and apply all applicable tags, rather than
+iterating through all cards for each tag type.
+
+Performance hypothesis: Single-pass should be faster due to:
+- Better cache locality (sequential card access)
+- Fewer DataFrame iterations
+- Less memory thrashing
+
+Trade-offs:
+- All tagging logic in one place (harder to maintain)
+- More complex per-card logic
+- Less modular than tag-centric approach
+
+M3: Created for Parquet migration performance testing.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import List, Set
+
+import pandas as pd
+
+from logging_util import get_logger
+
+logger = get_logger(__name__)
+
+
+class CardCentricTagger:
+    """Single-pass card tagger that applies all tags to each card sequentially."""
+    
+    def __init__(self):
+        """Initialize tagger with compiled regex patterns for performance."""
+        # Pre-compile common regex patterns
+        self.ramp_pattern = re.compile(
+            r'add .*mana|search.*land|ramp|cultivate|kodama|explosive vegetation',
+            re.IGNORECASE
+        )
+        self.draw_pattern = re.compile(
+            r'draw.*card|card draw|divination|ancestral|opt|cantrip',
+            re.IGNORECASE
+        )
+        self.removal_pattern = re.compile(
+            r'destroy|exile|counter|return.*hand|bounce|murder|wrath|swords',
+            re.IGNORECASE
+        )
+        self.token_pattern = re.compile(
+            r'create.*token|token.*creature|populate|embalm',
+            re.IGNORECASE
+        )
+        # Add more patterns as needed
+        
+    def tag_single_card(self, row: pd.Series) -> List[str]:
+        """Apply all applicable tags to a single card.
+        
+        Args:
+            row: pandas Series representing a card
+            
+        Returns:
+            List of tags that apply to this card
+        """
+        tags: Set[str] = set()
+        
+        # Extract common fields
+        text = str(row.get('text', '')).lower()
+        type_line = str(row.get('type', '')).lower()
+        keywords = row.get('keywords', [])
+        if isinstance(keywords, str):
+            keywords = [keywords]
+        mana_value = row.get('manaValue', 0)
+        
+        # === FOUNDATIONAL TAGS ===
+        
+        # Card types
+        if 'creature' in type_line:
+            tags.add('Creature')
+        if 'instant' in type_line:
+            tags.add('Instant')
+        if 'sorcery' in type_line:
+            tags.add('Sorcery')
+        if 'artifact' in type_line:
+            tags.add('Artifact')
+        if 'enchantment' in type_line:
+            tags.add('Enchantment')
+        if 'planeswalker' in type_line:
+            tags.add('Planeswalker')
+        if 'land' in type_line:
+            tags.add('Land')
+        
+        # === MECHANICAL TAGS ===
+        
+        # Ramp
+        if self.ramp_pattern.search(text):
+            tags.add('Ramp')
+            
+        # Card draw
+        if self.draw_pattern.search(text):
+            tags.add('Card Draw')
+            
+        # Removal
+        if self.removal_pattern.search(text):
+            tags.add('Removal')
+            tags.add('Interaction')
+            
+        # Tokens
+        if self.token_pattern.search(text):
+            tags.add('Tokens')
+        
+        # Keywords
+        if keywords:
+            for kw in keywords:
+                kw_lower = str(kw).lower()
+                if 'flash' in kw_lower:
+                    tags.add('Flash')
+                if 'haste' in kw_lower:
+                    tags.add('Haste')
+                if 'flying' in kw_lower:
+                    tags.add('Flying')
+                # Add more keyword mappings
+        
+        # === STRATEGIC TAGS ===
+        
+        # Voltron (equipment, auras on creatures)
+        if 'equipment' in type_line or 'equip' in text:
+            tags.add('Voltron')
+            tags.add('Equipment')
+        
+        if 'aura' in type_line and 'enchant creature' in text:
+            tags.add('Voltron')
+            tags.add('Auras')
+        
+        # Spellslinger (cares about instants/sorceries)
+        if 'instant' in text and 'sorcery' in text:
+            tags.add('Spellslinger')
+        
+        # Graveyard matters
+        if any(word in text for word in ['graveyard', 'flashback', 'unearth', 'delve', 'escape']):
+            tags.add('Graveyard')
+        
+        # === ARCHETYPE TAGS ===
+        
+        # Combo pieces (based on specific card text patterns)
+        if 'infinite' in text or 'any number' in text:
+            tags.add('Combo')
+        
+        # === MV-BASED TAGS ===
+        
+        if mana_value <= 2:
+            tags.add('Low MV')
+        elif mana_value >= 6:
+            tags.add('High MV')
+        
+        return sorted(list(tags))
+    
+    def tag_all_cards(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Apply tags to all cards in a single pass.
+        
+        Args:
+            df: DataFrame containing card data
+            
+        Returns:
+            DataFrame with themeTags column populated
+        """
+        logger.info(f"Starting card-centric tagging for {len(df)} cards")
+        
+        # Initialize themeTags column if not exists
+        if 'themeTags' not in df.columns:
+            df['themeTags'] = None
+        
+        # Single pass through all cards
+        tag_counts = {}
+        for idx in df.index:
+            row = df.loc[idx]
+            tags = self.tag_single_card(row)
+            df.at[idx, 'themeTags'] = tags
+            
+            # Track tag frequency
+            for tag in tags:
+                tag_counts[tag] = tag_counts.get(tag, 0) + 1
+        
+        logger.info(f"Tagged {len(df)} cards with {len(tag_counts)} unique tags")
+        logger.info(f"Top 10 tags: {sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:10]}")
+        
+        return df
+
+
+def tag_all_cards_single_pass(df: pd.DataFrame) -> pd.DataFrame:
+    """Convenience function for single-pass tagging.
+    
+    Args:
+        df: DataFrame containing card data
+        
+    Returns:
+        DataFrame with themeTags populated
+    """
+    tagger = CardCentricTagger()
+    return tagger.tag_all_cards(df)
--- a/code/tagging/theme_enrichment.py
+++ b/code/tagging/theme_enrichment.py
@ -0,0 +1,602 @@
+"""Consolidated theme metadata enrichment pipeline.
+
+Replaces 7 separate subprocess scripts with single efficient in-memory pipeline:
+1. autofill_min_examples - Add placeholder examples
+2. pad_min_examples - Pad to minimum threshold
+3. cleanup_placeholder_examples - Remove placeholders when real examples added
+4. purge_anchor_placeholders - Purge legacy anchor placeholders
+5. augment_theme_yaml_from_catalog - Add descriptions/popularity from catalog
+6. generate_theme_editorial_suggestions - Generate editorial suggestions
+7. lint_theme_editorial - Validate metadata
+
+Performance improvement: 5-10x faster by loading all YAMLs once, processing in memory,
+writing once at the end.
+"""
+from __future__ import annotations
+
+import json
+import re
+import string
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+try:
+    import yaml  # type: ignore
+except ImportError:  # pragma: no cover
+    yaml = None
+
+
+@dataclass
+class ThemeData:
+    """In-memory representation of a theme YAML file."""
+    path: Path
+    data: Dict[str, Any]
+    modified: bool = False
+
+
+@dataclass
+class EnrichmentStats:
+    """Statistics for enrichment pipeline run."""
+    autofilled: int = 0
+    padded: int = 0
+    cleaned: int = 0
+    purged: int = 0
+    augmented: int = 0
+    suggestions_added: int = 0
+    lint_errors: int = 0
+    lint_warnings: int = 0
+    total_themes: int = 0
+    
+    def __str__(self) -> str:
+        return (
+            f"Enrichment complete: {self.total_themes} themes processed | "
+            f"autofilled:{self.autofilled} padded:{self.padded} cleaned:{self.cleaned} "
+            f"purged:{self.purged} augmented:{self.augmented} suggestions:{self.suggestions_added} | "
+            f"lint: {self.lint_errors} errors, {self.lint_warnings} warnings"
+        )
+
+
+class ThemeEnrichmentPipeline:
+    """Consolidated theme metadata enrichment pipeline."""
+    
+    def __init__(
+        self,
+        root: Optional[Path] = None,
+        min_examples: int = 5,
+        progress_callback: Optional[Callable[[str], None]] = None,
+    ):
+        """Initialize the enrichment pipeline.
+        
+        Args:
+            root: Project root directory (defaults to auto-detect)
+            min_examples: Minimum number of example commanders required
+            progress_callback: Optional callback for progress updates (for web UI)
+        """
+        if root is None:
+            # Auto-detect root (3 levels up from this file)
+            root = Path(__file__).resolve().parents[2]
+        
+        self.root = root
+        self.catalog_dir = root / 'config' / 'themes' / 'catalog'
+        self.theme_json = root / 'config' / 'themes' / 'theme_list.json'
+        self.csv_dir = root / 'csv_files'
+        self.min_examples = min_examples
+        self.progress_callback = progress_callback
+        
+        self.themes: Dict[Path, ThemeData] = {}
+        self.stats = EnrichmentStats()
+        
+        # Cached data
+        self._catalog_map: Optional[Dict[str, Dict[str, Any]]] = None
+        self._card_suggestions: Optional[Dict[str, Any]] = None
+    
+    def _emit(self, message: str) -> None:
+        """Emit progress message via callback or print."""
+        if self.progress_callback:
+            try:
+                self.progress_callback(message)
+            except Exception:
+                pass
+        else:
+            print(message, flush=True)
+    
+    def load_all_themes(self) -> None:
+        """Load all theme YAML files into memory (Step 0)."""
+        if not self.catalog_dir.exists():
+            self._emit("Warning: Catalog directory does not exist")
+            return
+        
+        paths = sorted(self.catalog_dir.glob('*.yml'))
+        self.stats.total_themes = len(paths)
+        
+        for path in paths:
+            try:
+                if yaml is None:
+                    raise RuntimeError("PyYAML not installed")
+                data = yaml.safe_load(path.read_text(encoding='utf-8'))
+                if isinstance(data, dict):
+                    self.themes[path] = ThemeData(path=path, data=data)
+            except Exception as e:
+                self._emit(f"Warning: Failed to load {path.name}: {e}")
+        
+        self._emit(f"Loaded {len(self.themes)} theme files")
+    
+    def _is_deprecated_alias(self, theme_data: Dict[str, Any]) -> bool:
+        """Check if theme is a deprecated alias placeholder."""
+        notes = theme_data.get('notes')
+        return isinstance(notes, str) and 'Deprecated alias file' in notes
+    
+    def _is_placeholder(self, entry: str) -> bool:
+        """Check if an example entry is a placeholder.
+        
+        Matches:
+        - "Theme Anchor"
+        - "Theme Anchor B"
+        - "Theme Anchor C"
+        etc.
+        """
+        pattern = re.compile(r" Anchor( [A-Z])?$")
+        return bool(pattern.search(entry))
+    
+    # Step 1: Autofill minimal placeholders
+    def autofill_placeholders(self) -> None:
+        """Add placeholder examples for themes with zero examples."""
+        for theme in self.themes.values():
+            data = theme.data
+            
+            if self._is_deprecated_alias(data):
+                continue
+            
+            if not data.get('display_name'):
+                continue
+            
+            # Skip if theme already has real (non-placeholder) examples in YAML
+            examples = data.get('example_commanders') or []
+            if isinstance(examples, list) and examples:
+                # Check if any examples are real (not " Anchor" placeholders)
+                has_real_examples = any(
+                    isinstance(ex, str) and ex and not ex.endswith(' Anchor')
+                    for ex in examples
+                )
+                if has_real_examples:
+                    continue  # Already has real examples, skip placeholder generation
+                # If only placeholders, continue to avoid overwriting
+            
+            display = data['display_name']
+            synergies = data.get('synergies') or []
+            if not isinstance(synergies, list):
+                synergies = []
+            
+            # Generate placeholders from display name + synergies
+            placeholders = [f"{display} Anchor"]
+            for s in synergies[:2]:  # First 2 synergies
+                if isinstance(s, str) and s and s != display:
+                    placeholders.append(f"{s} Anchor")
+            
+            data['example_commanders'] = placeholders
+            if not data.get('editorial_quality'):
+                data['editorial_quality'] = 'draft'
+            
+            theme.modified = True
+            self.stats.autofilled += 1
+    
+    # Step 2: Pad to minimum examples
+    def pad_examples(self) -> None:
+        """Pad example lists to minimum threshold with placeholders."""
+        for theme in self.themes.values():
+            data = theme.data
+            
+            if self._is_deprecated_alias(data):
+                continue
+            
+            if not data.get('display_name'):
+                continue
+            
+            examples = data.get('example_commanders') or []
+            if not isinstance(examples, list):
+                continue
+            
+            if len(examples) >= self.min_examples:
+                continue
+            
+            # Only pad pure placeholder sets (heuristic: don't mix real + placeholders)
+            if any(not self._is_placeholder(e) for e in examples):
+                continue
+            
+            display = data['display_name']
+            synergies = data.get('synergies') if isinstance(data.get('synergies'), list) else []
+            need = self.min_examples - len(examples)
+            
+            # Build additional placeholders
+            new_placeholders = []
+            used = set(examples)
+            
+            # 1. Additional synergies beyond first 2
+            for syn in synergies[2:]:
+                cand = f"{syn} Anchor"
+                if cand not in used and syn != display:
+                    new_placeholders.append(cand)
+                    if len(new_placeholders) >= need:
+                        break
+            
+            # 2. Generic letter suffixes (B, C, D, ...)
+            if len(new_placeholders) < need:
+                for suffix in string.ascii_uppercase[1:]:  # Start from 'B'
+                    cand = f"{display} Anchor {suffix}"
+                    if cand not in used:
+                        new_placeholders.append(cand)
+                        if len(new_placeholders) >= need:
+                            break
+            
+            if new_placeholders:
+                data['example_commanders'] = examples + new_placeholders
+                if not data.get('editorial_quality'):
+                    data['editorial_quality'] = 'draft'
+                theme.modified = True
+                self.stats.padded += 1
+    
+    # Step 3: Cleanup placeholders when real examples exist
+    def cleanup_placeholders(self) -> None:
+        """Remove placeholders when real examples have been added."""
+        for theme in self.themes.values():
+            data = theme.data
+            
+            if self._is_deprecated_alias(data):
+                continue
+            
+            if not data.get('display_name'):
+                continue
+            
+            examples = data.get('example_commanders')
+            if not isinstance(examples, list) or not examples:
+                continue
+            
+            placeholders = [e for e in examples if isinstance(e, str) and self._is_placeholder(e)]
+            real = [e for e in examples if isinstance(e, str) and not self._is_placeholder(e)]
+            
+            # Only cleanup if we have both placeholders AND real examples
+            if placeholders and real:
+                new_list = real if real else placeholders[:1]  # Keep at least one if all placeholders
+                if new_list != examples:
+                    data['example_commanders'] = new_list
+                    theme.modified = True
+                    self.stats.cleaned += 1
+    
+    # Step 4: Purge legacy anchor placeholders
+    def purge_anchors(self) -> None:
+        """Remove all legacy anchor placeholders."""
+        pattern = re.compile(r" Anchor( [A-Z])?$")
+        
+        for theme in self.themes.values():
+            data = theme.data
+            
+            examples = data.get('example_commanders')
+            if not isinstance(examples, list) or not examples:
+                continue
+            
+            placeholders = [e for e in examples if isinstance(e, str) and pattern.search(e)]
+            if not placeholders:
+                continue
+            
+            real = [e for e in examples if isinstance(e, str) and not pattern.search(e)]
+            new_list = real  # Remove ALL placeholders (even if list becomes empty)
+            
+            if new_list != examples:
+                data['example_commanders'] = new_list
+                theme.modified = True
+                self.stats.purged += 1
+    
+    # Step 5: Augment from catalog
+    def _load_catalog_map(self) -> Dict[str, Dict[str, Any]]:
+        """Load theme_list.json catalog into memory."""
+        if self._catalog_map is not None:
+            return self._catalog_map
+        
+        if not self.theme_json.exists():
+            self._emit("Warning: theme_list.json not found")
+            self._catalog_map = {}
+            return self._catalog_map
+        
+        try:
+            data = json.loads(self.theme_json.read_text(encoding='utf-8') or '{}')
+            themes = data.get('themes') or []
+            self._catalog_map = {}
+            for t in themes:
+                if isinstance(t, dict) and t.get('theme'):
+                    self._catalog_map[str(t['theme'])] = t
+        except Exception as e:
+            self._emit(f"Warning: Failed to parse theme_list.json: {e}")
+            self._catalog_map = {}
+        
+        return self._catalog_map
+    
+    def augment_from_catalog(self) -> None:
+        """Add description, popularity, etc. from theme_list.json."""
+        catalog_map = self._load_catalog_map()
+        if not catalog_map:
+            return
+        
+        for theme in self.themes.values():
+            data = theme.data
+            
+            if self._is_deprecated_alias(data):
+                continue
+            
+            name = str(data.get('display_name') or '').strip()
+            if not name:
+                continue
+            
+            cat_entry = catalog_map.get(name)
+            if not cat_entry:
+                continue
+            
+            modified = False
+            
+            # Add description if missing
+            if 'description' not in data and 'description' in cat_entry and cat_entry['description']:
+                data['description'] = cat_entry['description']
+                modified = True
+            
+            # Add popularity bucket if missing
+            if 'popularity_bucket' not in data and cat_entry.get('popularity_bucket'):
+                data['popularity_bucket'] = cat_entry['popularity_bucket']
+                modified = True
+            
+            # Add popularity hint if missing
+            if 'popularity_hint' not in data and cat_entry.get('popularity_hint'):
+                data['popularity_hint'] = cat_entry['popularity_hint']
+                modified = True
+            
+            # Backfill deck archetype if missing (defensive)
+            if 'deck_archetype' not in data and cat_entry.get('deck_archetype'):
+                data['deck_archetype'] = cat_entry['deck_archetype']
+                modified = True
+            
+            if modified:
+                theme.modified = True
+                self.stats.augmented += 1
+    
+    # Step 6: Generate editorial suggestions (simplified - full implementation would scan CSVs)
+    def generate_suggestions(self) -> None:
+        """Generate editorial suggestions for missing example_cards/commanders.
+        
+        This runs the generate_theme_editorial_suggestions.py script to populate
+        example_cards and example_commanders from CSV data (EDHREC ranks + themeTags).
+        """
+        import os
+        import subprocess
+        
+        # Check if we should run the editorial suggestions generator
+        skip_suggestions = os.environ.get('SKIP_EDITORIAL_SUGGESTIONS', '').lower() in ('1', 'true', 'yes')
+        if skip_suggestions:
+            self._emit("Skipping editorial suggestions generation (SKIP_EDITORIAL_SUGGESTIONS=1)")
+            return
+        
+        script_path = self.root / 'code' / 'scripts' / 'generate_theme_editorial_suggestions.py'
+        if not script_path.exists():
+            self._emit("Editorial suggestions script not found; skipping")
+            return
+        
+        try:
+            self._emit("Generating example_cards and example_commanders from CSV data...")
+            # Run with --apply to write missing fields, limit to reasonable batch
+            result = subprocess.run(
+                [sys.executable, str(script_path), '--apply', '--limit-yaml', '1000', '--top', '8'],
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minute timeout
+                cwd=str(self.root)
+            )
+            if result.returncode == 0:
+                # Reload themes to pick up the generated examples
+                self.load_all_themes()
+                self._emit("Editorial suggestions generated successfully")
+            else:
+                self._emit(f"Editorial suggestions script failed (exit {result.returncode}): {result.stderr[:200]}")
+        except subprocess.TimeoutExpired:
+            self._emit("Editorial suggestions generation timed out (skipping)")
+        except Exception as e:
+            self._emit(f"Failed to generate editorial suggestions: {e}")
+    
+    # Step 7: Lint/validate
+    ALLOWED_ARCHETYPES: Set[str] = {
+        'Lands', 'Graveyard', 'Planeswalkers', 'Tokens', 'Counters', 'Spells', 
+        'Artifacts', 'Enchantments', 'Politics', 'Combo', 'Aggro', 'Control', 
+        'Midrange', 'Stax', 'Ramp', 'Toolbox'
+    }
+    
+    CORNERSTONE: Set[str] = {
+        'Landfall', 'Reanimate', 'Superfriends', 'Tokens Matter', '+1/+1 Counters'
+    }
+    
+    def validate(self, enforce_min: bool = False, strict: bool = False) -> None:
+        """Validate theme metadata (lint)."""
+        errors: List[str] = []
+        warnings: List[str] = []
+        seen_display: Set[str] = set()
+        
+        for theme in self.themes.values():
+            data = theme.data
+            
+            if self._is_deprecated_alias(data):
+                continue
+            
+            name = str(data.get('display_name') or '').strip()
+            if not name:
+                continue
+            
+            if name in seen_display:
+                continue  # Skip duplicates
+            seen_display.add(name)
+            
+            ex_cmd = data.get('example_commanders') or []
+            ex_cards = data.get('example_cards') or []
+            
+            if not isinstance(ex_cmd, list):
+                errors.append(f"{name}: example_commanders not a list")
+                ex_cmd = []
+            
+            if not isinstance(ex_cards, list):
+                errors.append(f"{name}: example_cards not a list")
+                ex_cards = []
+            
+            # Length checks
+            if len(ex_cmd) > 12:
+                warnings.append(f"{name}: example_commanders has {len(ex_cmd)} entries (>12)")
+            
+            if len(ex_cards) > 20:
+                warnings.append(f"{name}: example_cards has {len(ex_cards)} entries (>20)")
+            
+            # Minimum examples check
+            if ex_cmd and len(ex_cmd) < self.min_examples:
+                msg = f"{name}: only {len(ex_cmd)} example_commanders (<{self.min_examples} minimum)"
+                if enforce_min:
+                    errors.append(msg)
+                else:
+                    warnings.append(msg)
+            
+            # Cornerstone themes should have examples (if strict)
+            if strict and name in self.CORNERSTONE:
+                if not ex_cmd:
+                    errors.append(f"{name}: cornerstone theme missing example_commanders")
+                if not ex_cards:
+                    errors.append(f"{name}: cornerstone theme missing example_cards")
+            
+            # Deck archetype validation
+            archetype = data.get('deck_archetype')
+            if archetype and archetype not in self.ALLOWED_ARCHETYPES:
+                warnings.append(f"{name}: unknown deck_archetype '{archetype}'")
+        
+        self.stats.lint_errors = len(errors)
+        self.stats.lint_warnings = len(warnings)
+        
+        if errors:
+            for err in errors:
+                self._emit(f"ERROR: {err}")
+        
+        if warnings:
+            for warn in warnings:
+                self._emit(f"WARNING: {warn}")
+    
+    def write_all_themes(self) -> None:
+        """Write all modified themes back to disk (final step)."""
+        if yaml is None:
+            raise RuntimeError("PyYAML not installed; cannot write themes")
+        
+        written = 0
+        for theme in self.themes.values():
+            if theme.modified:
+                try:
+                    theme.path.write_text(
+                        yaml.safe_dump(theme.data, sort_keys=False, allow_unicode=True),
+                        encoding='utf-8'
+                    )
+                    written += 1
+                except Exception as e:
+                    self._emit(f"Error writing {theme.path.name}: {e}")
+        
+        self._emit(f"Wrote {written} modified theme files")
+    
+    def run_all(
+        self,
+        write: bool = True,
+        enforce_min: bool = False,
+        strict_lint: bool = False,
+        run_purge: bool = False,
+    ) -> EnrichmentStats:
+        """Run the full enrichment pipeline.
+        
+        Args:
+            write: Whether to write changes to disk (False = dry run)
+            enforce_min: Whether to treat min_examples violations as errors
+            strict_lint: Whether to enforce strict validation rules
+            run_purge: Whether to run purge step (removes ALL anchor placeholders)
+        
+        Returns:
+            EnrichmentStats with summary of operations
+        """
+        self._emit("Starting theme enrichment pipeline...")
+        
+        # Step 0: Load all themes
+        self.load_all_themes()
+        
+        # Step 1: Autofill placeholders
+        self._emit("Step 1/7: Autofilling placeholders...")
+        self.autofill_placeholders()
+        
+        # Step 2: Pad to minimum
+        self._emit("Step 2/7: Padding to minimum examples...")
+        self.pad_examples()
+        
+        # Step 3: Cleanup mixed placeholder/real lists
+        self._emit("Step 3/7: Cleaning up placeholders...")
+        self.cleanup_placeholders()
+        
+        # Step 4: Purge all anchor placeholders (optional - disabled by default)
+        # Note: Purge removes ALL anchors, even from pure placeholder lists.
+        # Only enable for one-time migration away from placeholder system.
+        if run_purge:
+            self._emit("Step 4/7: Purging legacy anchors...")
+            self.purge_anchors()
+        else:
+            self._emit("Step 4/7: Skipping purge (preserving placeholders)...")
+        
+        # Step 5: Augment from catalog
+        self._emit("Step 5/7: Augmenting from catalog...")
+        self.augment_from_catalog()
+        
+        # Step 6: Generate suggestions (skipped for performance)
+        self._emit("Step 6/7: Generating suggestions...")
+        self.generate_suggestions()
+        
+        # Step 7: Validate
+        self._emit("Step 7/7: Validating metadata...")
+        self.validate(enforce_min=enforce_min, strict=strict_lint)
+        
+        # Write changes
+        if write:
+            self._emit("Writing changes to disk...")
+            self.write_all_themes()
+        else:
+            self._emit("Dry run: no files written")
+        
+        self._emit(str(self.stats))
+        return self.stats
+
+
+def run_enrichment_pipeline(
+    root: Optional[Path] = None,
+    min_examples: int = 5,
+    write: bool = True,
+    enforce_min: bool = False,
+    strict: bool = False,
+    run_purge: bool = False,
+    progress_callback: Optional[Callable[[str], None]] = None,
+) -> EnrichmentStats:
+    """Convenience function to run the enrichment pipeline.
+    
+    Args:
+        root: Project root directory
+        min_examples: Minimum number of example commanders
+        write: Whether to write changes (False = dry run)
+        enforce_min: Treat min examples violations as errors
+        strict: Enforce strict validation rules
+        run_purge: Whether to run purge step (removes ALL placeholders)
+        progress_callback: Optional progress callback
+    
+    Returns:
+        EnrichmentStats summary
+    """
+    pipeline = ThemeEnrichmentPipeline(
+        root=root,
+        min_examples=min_examples,
+        progress_callback=progress_callback,
+    )
+    return pipeline.run_all(
+        write=write,
+        enforce_min=enforce_min,
+        strict_lint=strict,
+        run_purge=run_purge
+    )
--- a/code/tagging/verify_columns.py
+++ b/code/tagging/verify_columns.py
@ -0,0 +1,41 @@
+"""Quick verification script to check column preservation after tagging."""
+
+import pandas as pd
+from code.path_util import get_processed_cards_path
+
+def verify_columns():
+    """Verify that all expected columns are present after tagging."""
+    path = get_processed_cards_path()
+    df = pd.read_parquet(path)
+    
+    print(f"Loaded {len(df):,} cards from {path}")
+    print(f"\nColumns ({len(df.columns)}):")
+    for col in df.columns:
+        print(f"  - {col}")
+    
+    # Check critical columns
+    expected = ['isCommander', 'isBackground', 'metadataTags', 'themeTags']
+    missing = [col for col in expected if col not in df.columns]
+    
+    if missing:
+        print(f"\n❌ MISSING COLUMNS: {missing}")
+        return False
+    
+    print(f"\n✅ All critical columns present!")
+    
+    # Check counts
+    if 'isCommander' in df.columns:
+        print(f"   isCommander: {df['isCommander'].sum()} True")
+    if 'isBackground' in df.columns:
+        print(f"   isBackground: {df['isBackground'].sum()} True")
+    if 'themeTags' in df.columns:
+        total_tags = df['themeTags'].apply(lambda x: len(x) if isinstance(x, list) else 0).sum()
+        print(f"   themeTags: {total_tags:,} total tags")
+    if 'metadataTags' in df.columns:
+        total_meta = df['metadataTags'].apply(lambda x: len(x) if isinstance(x, list) else 0).sum()
+        print(f"   metadataTags: {total_meta:,} total tags")
+    
+    return True
+
+if __name__ == "__main__":
+    verify_columns()
--- a/code/tests/test_additional_theme_config.py
+++ b/code/tests/test_additional_theme_config.py
@ -4,7 +4,23 @@ from pathlib import Path

 import pytest

-from code.headless_runner import resolve_additional_theme_inputs as _resolve_additional_theme_inputs, _parse_theme_list
+from code.headless_runner import resolve_additional_theme_inputs as _resolve_additional_theme_inputs
+
+
+def _parse_theme_list(themes_str: str) -> list[str]:
+    """Parse semicolon-separated theme list (helper for tests)."""
+    if not themes_str:
+        return []
+    themes = [t.strip() for t in themes_str.split(';') if t.strip()]
+    # Deduplicate while preserving order (case-insensitive)
+    seen = set()
+    result = []
+    for theme in themes:
+        key = theme.lower()
+        if key not in seen:
+            seen.add(key)
+            result.append(theme)
+    return result


 def _write_catalog(path: Path) -> None:
--- a/code/tests/test_all_cards_loader.py
+++ b/code/tests/test_all_cards_loader.py
@ -0,0 +1,408 @@
+"""
+Tests for AllCardsLoader and CardQueryBuilder
+
+Tests cover:
+- Loading and caching behavior
+- Single and batch card lookups
+- Color, theme, and type filtering
+- Text search
+- Query builder fluent API
+- Performance benchmarks
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import time
+
+import pandas as pd
+import pytest
+
+from code.services.all_cards_loader import AllCardsLoader
+from code.services.card_query_builder import CardQueryBuilder
+
+
+@pytest.fixture
+def sample_cards_df():
+    """Create a sample DataFrame for testing."""
+    return pd.DataFrame(
+        {
+            "name": [
+                "Sol Ring",
+                "Lightning Bolt",
+                "Counterspell",
+                "Giant Growth",
+                "Goblin Token Maker",
+                "Dark Ritual",
+                "Swords to Plowshares",
+                "Birds of Paradise",
+            ],
+            "colorIdentity": ["Colorless", "R", "U", "G", "R", "B", "W", "G"],
+            "type": [
+                "Artifact",
+                "Instant",
+                "Instant",
+                "Instant",
+                "Creature — Goblin",
+                "Instant",
+                "Instant",
+                "Creature — Bird",
+            ],
+            "text": [
+                "Add two mana",
+                "Deal 3 damage",
+                "Counter target spell",
+                "Target creature gets +3/+3",
+                "When this enters, create two 1/1 red Goblin creature tokens",
+                "Add three black mana",
+                "Exile target creature",
+                "Flying, Add one mana of any color",
+            ],
+            "themeTags": [
+                "",
+                "burn,damage",
+                "control,counterspells",
+                "combat,pump",
+                "tokens,goblins",
+                "ritual,fast-mana",
+                "removal,exile",
+                "ramp,mana-dork",
+            ],
+        }
+    )
+
+
+@pytest.fixture
+def sample_parquet_file(sample_cards_df):
+    """Create a temporary Parquet file for testing."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as tmp:
+        sample_cards_df.to_parquet(tmp.name, engine="pyarrow")
+        yield tmp.name
+    os.unlink(tmp.name)
+
+
+def test_loader_initialization(sample_parquet_file):
+    """Test AllCardsLoader initialization."""
+    loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=60)
+    assert loader.file_path == sample_parquet_file
+    assert loader.cache_ttl == 60
+    assert loader._df is None
+
+
+def test_loader_load(sample_parquet_file):
+    """Test loading Parquet file."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    df = loader.load()
+    assert len(df) == 8
+    assert "name" in df.columns
+    assert "colorIdentity" in df.columns
+
+
+def test_loader_caching(sample_parquet_file):
+    """Test that caching works and doesn't reload unnecessarily."""
+    loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=300)
+
+    # First load
+    start_time = time.time()
+    df1 = loader.load()
+    first_load_time = time.time() - start_time
+
+    # Second load (should use cache)
+    start_time = time.time()
+    df2 = loader.load()
+    cached_load_time = time.time() - start_time
+
+    # Cache should be much faster
+    assert cached_load_time < first_load_time / 2
+    assert df1 is df2  # Same object
+
+
+def test_loader_force_reload(sample_parquet_file):
+    """Test force_reload flag."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    df1 = loader.load()
+    df2 = loader.load(force_reload=True)
+
+    assert df1 is not df2  # Different objects
+    assert len(df1) == len(df2)  # Same data
+
+
+def test_loader_cache_expiration(sample_parquet_file):
+    """Test cache expiration after TTL."""
+    loader = AllCardsLoader(file_path=sample_parquet_file, cache_ttl=1)
+
+    df1 = loader.load()
+    time.sleep(1.1)  # Wait for TTL to expire
+    df2 = loader.load()
+
+    assert df1 is not df2  # Should have reloaded
+
+
+def test_get_by_name(sample_parquet_file):
+    """Test single card lookup by name."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    card = loader.get_by_name("Sol Ring")
+    assert card is not None
+    assert card["name"] == "Sol Ring"
+    assert card["colorIdentity"] == "Colorless"
+
+    # Non-existent card
+    card = loader.get_by_name("Nonexistent Card")
+    assert card is None
+
+
+def test_get_by_names(sample_parquet_file):
+    """Test batch card lookup by names."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    cards = loader.get_by_names(["Sol Ring", "Lightning Bolt", "Counterspell"])
+    assert len(cards) == 3
+    assert "Sol Ring" in cards["name"].values
+    assert "Lightning Bolt" in cards["name"].values
+
+    # Empty list
+    cards = loader.get_by_names([])
+    assert len(cards) == 0
+
+    # Non-existent cards
+    cards = loader.get_by_names(["Nonexistent1", "Nonexistent2"])
+    assert len(cards) == 0
+
+
+def test_filter_by_color_identity(sample_parquet_file):
+    """Test color identity filtering."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    # Single color
+    red_cards = loader.filter_by_color_identity(["R"])
+    assert len(red_cards) == 2
+    assert "Lightning Bolt" in red_cards["name"].values
+    assert "Goblin Token Maker" in red_cards["name"].values
+
+    # Colorless
+    colorless = loader.filter_by_color_identity(["Colorless"])
+    assert len(colorless) == 1
+    assert colorless["name"].values[0] == "Sol Ring"
+
+
+def test_filter_by_themes(sample_parquet_file):
+    """Test theme filtering."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    # Single theme
+    token_cards = loader.filter_by_themes(["tokens"], mode="any")
+    assert len(token_cards) == 1
+    assert token_cards["name"].values[0] == "Goblin Token Maker"
+
+    # Multiple themes (any)
+    cards = loader.filter_by_themes(["burn", "removal"], mode="any")
+    assert len(cards) == 2  # Lightning Bolt and Swords to Plowshares
+
+    # Multiple themes (all)
+    cards = loader.filter_by_themes(["tokens", "goblins"], mode="all")
+    assert len(cards) == 1
+    assert cards["name"].values[0] == "Goblin Token Maker"
+
+
+def test_filter_by_type(sample_parquet_file):
+    """Test type filtering."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    creatures = loader.filter_by_type("Creature")
+    assert len(creatures) == 2
+    assert "Goblin Token Maker" in creatures["name"].values
+    assert "Birds of Paradise" in creatures["name"].values
+
+    instants = loader.filter_by_type("Instant")
+    assert len(instants) == 5
+
+
+def test_search(sample_parquet_file):
+    """Test text search."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    # Search in text
+    results = loader.search("token")
+    assert len(results) >= 1
+    assert "Goblin Token Maker" in results["name"].values
+
+    # Search in name
+    results = loader.search("Sol")
+    assert len(results) == 1
+    assert results["name"].values[0] == "Sol Ring"
+
+    # Limit results
+    results = loader.search("mana", limit=1)
+    assert len(results) == 1
+
+
+def test_get_stats(sample_parquet_file):
+    """Test stats retrieval."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()
+
+    stats = loader.get_stats()
+    assert stats["total_cards"] == 8
+    assert stats["cached"] is True
+    assert stats["file_size_mb"] >= 0  # Small test file may round to 0
+    assert "cache_age_seconds" in stats
+
+
+def test_clear_cache(sample_parquet_file):
+    """Test cache clearing."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()
+
+    assert loader._df is not None
+    loader.clear_cache()
+    assert loader._df is None
+
+
+def test_query_builder_basic(sample_parquet_file):
+    """Test basic query builder usage."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    builder = CardQueryBuilder(loader=loader)
+
+    # Execute without filters
+    results = builder.execute()
+    assert len(results) == 8
+
+    # Single filter
+    results = builder.reset().colors(["R"]).execute()
+    assert len(results) == 2
+
+
+def test_query_builder_chaining(sample_parquet_file):
+    """Test query builder method chaining."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    results = (
+        CardQueryBuilder(loader=loader)
+        .types("Creature")
+        .themes(["tokens"], mode="any")
+        .execute()
+    )
+    assert len(results) == 1
+    assert results["name"].values[0] == "Goblin Token Maker"
+
+
+def test_query_builder_names(sample_parquet_file):
+    """Test query builder with specific names."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    results = (
+        CardQueryBuilder(loader=loader)
+        .names(["Sol Ring", "Lightning Bolt"])
+        .execute()
+    )
+    assert len(results) == 2
+
+
+def test_query_builder_limit(sample_parquet_file):
+    """Test query builder limit."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    results = CardQueryBuilder(loader=loader).limit(3).execute()
+    assert len(results) == 3
+
+
+def test_query_builder_count(sample_parquet_file):
+    """Test query builder count method."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    count = CardQueryBuilder(loader=loader).types("Instant").count()
+    assert count == 5
+
+
+def test_query_builder_first(sample_parquet_file):
+    """Test query builder first method."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    card = CardQueryBuilder(loader=loader).colors(["R"]).first()
+    assert card is not None
+    assert card["colorIdentity"] == "R"
+
+    # No results
+    card = CardQueryBuilder(loader=loader).colors(["X"]).first()
+    assert card is None
+
+
+def test_query_builder_complex(sample_parquet_file):
+    """Test complex query with multiple filters."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+
+    results = (
+        CardQueryBuilder(loader=loader)
+        .types("Instant")
+        .colors(["R"])
+        .search("damage")
+        .limit(5)
+        .execute()
+    )
+    assert len(results) == 1
+    assert results["name"].values[0] == "Lightning Bolt"
+
+
+def test_performance_single_lookup(sample_parquet_file):
+    """Benchmark single card lookup performance."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()  # Warm up cache
+
+    start = time.time()
+    for _ in range(100):
+        loader.get_by_name("Sol Ring")
+    elapsed = time.time() - start
+
+    avg_time_ms = (elapsed / 100) * 1000
+    print(f"\nSingle lookup avg: {avg_time_ms:.3f}ms")
+    assert avg_time_ms < 10  # Should be <10ms per lookup
+
+
+def test_performance_batch_lookup(sample_parquet_file):
+    """Benchmark batch card lookup performance."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()  # Warm up cache
+
+    names = ["Sol Ring", "Lightning Bolt", "Counterspell"]
+
+    start = time.time()
+    for _ in range(100):
+        loader.get_by_names(names)
+    elapsed = time.time() - start
+
+    avg_time_ms = (elapsed / 100) * 1000
+    print(f"\nBatch lookup (3 cards) avg: {avg_time_ms:.3f}ms")
+    assert avg_time_ms < 15  # Should be <15ms per batch
+
+
+def test_performance_filter_by_color(sample_parquet_file):
+    """Benchmark color filtering performance."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()  # Warm up cache
+
+    start = time.time()
+    for _ in range(100):
+        loader.filter_by_color_identity(["R"])
+    elapsed = time.time() - start
+
+    avg_time_ms = (elapsed / 100) * 1000
+    print(f"\nColor filter avg: {avg_time_ms:.3f}ms")
+    assert avg_time_ms < 20  # Should be <20ms per filter
+
+
+def test_performance_search(sample_parquet_file):
+    """Benchmark text search performance."""
+    loader = AllCardsLoader(file_path=sample_parquet_file)
+    loader.load()  # Warm up cache
+
+    start = time.time()
+    for _ in range(100):
+        loader.search("token", limit=100)
+    elapsed = time.time() - start
+
+    avg_time_ms = (elapsed / 100) * 1000
+    print(f"\nText search avg: {avg_time_ms:.3f}ms")
+    assert avg_time_ms < 50  # Should be <50ms per search
--- a/code/tests/test_bracket_policy_applier.py
+++ b/code/tests/test_bracket_policy_applier.py
@ -11,9 +11,9 @@ def _load_applier():
    root = Path(__file__).resolve().parents[2]
    mod_path = root / 'code' / 'tagging' / 'bracket_policy_applier.py'
    spec = importlib.util.spec_from_file_location('bracket_policy_applier', str(mod_path))
-    mod = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
+    mod = importlib.util.module_from_spec(spec)
    assert spec and spec.loader
-    spec.loader.exec_module(mod)  # type: ignore[assignment]
+    spec.loader.exec_module(mod)
    return mod


--- a/code/tests/test_card_aggregator.py
+++ b/code/tests/test_card_aggregator.py
@ -0,0 +1,340 @@
+"""
+Tests for Card Aggregator
+
+Tests the CardAggregator class functionality including:
+- Full aggregation of multiple CSV files
+- Deduplication (keeping most recent)
+- Exclusion of master files (cards.csv, commander_cards.csv)
+- Validation of output
+- Version rotation
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from code.file_setup.card_aggregator import CardAggregator
+
+
+@pytest.fixture
+def temp_dirs():
+    """Create temporary directories for testing."""
+    with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as output_dir:
+        yield source_dir, output_dir
+
+
+@pytest.fixture
+def sample_card_data():
+    """Sample card data for testing."""
+    return {
+        "name": ["Sol Ring", "Lightning Bolt", "Counterspell"],
+        "faceName": ["Sol Ring", "Lightning Bolt", "Counterspell"],
+        "colorIdentity": ["Colorless", "R", "U"],
+        "manaCost": ["{1}", "{R}", "{U}{U}"],
+        "manaValue": [1, 1, 2],
+        "type": ["Artifact", "Instant", "Instant"],
+        "text": [
+            "Add two colorless mana",
+            "Deal 3 damage",
+            "Counter target spell",
+        ],
+    }
+
+
+def test_ensure_output_dir(temp_dirs):
+    """Test that output directory is created."""
+    _, output_dir = temp_dirs
+    aggregator = CardAggregator(output_dir=output_dir)
+
+    assert os.path.exists(output_dir)
+    assert aggregator.output_dir == output_dir
+
+
+def test_get_card_csvs_excludes_master_files(temp_dirs):
+    """Test that cards.csv and commander_cards.csv are excluded."""
+    source_dir, _ = temp_dirs
+
+    # Create test files
+    Path(source_dir, "cards.csv").touch()
+    Path(source_dir, "commander_cards.csv").touch()
+    Path(source_dir, "blue_cards.csv").touch()
+    Path(source_dir, "red_cards.csv").touch()
+    Path(source_dir, ".temp_cards.csv").touch()
+    Path(source_dir, "_temp_cards.csv").touch()
+
+    aggregator = CardAggregator()
+    csv_files = aggregator.get_card_csvs(source_dir)
+
+    # Should only include blue_cards.csv and red_cards.csv
+    basenames = [os.path.basename(f) for f in csv_files]
+    assert "blue_cards.csv" in basenames
+    assert "red_cards.csv" in basenames
+    assert "cards.csv" not in basenames
+    assert "commander_cards.csv" not in basenames
+    assert ".temp_cards.csv" not in basenames
+    assert "_temp_cards.csv" not in basenames
+    assert len(csv_files) == 2
+
+
+def test_deduplicate_cards(sample_card_data):
+    """Test that duplicate cards are removed, keeping the last occurrence."""
+    # Create DataFrame with duplicates
+    df = pd.DataFrame(sample_card_data)
+
+    # Add duplicate Sol Ring with different text
+    duplicate_data = {
+        "name": ["Sol Ring"],
+        "faceName": ["Sol Ring"],
+        "colorIdentity": ["Colorless"],
+        "manaCost": ["{1}"],
+        "manaValue": [1],
+        "type": ["Artifact"],
+        "text": ["Add two colorless mana (updated)"],
+    }
+    df_duplicate = pd.DataFrame(duplicate_data)
+    df_combined = pd.concat([df, df_duplicate], ignore_index=True)
+
+    # Should have 4 rows before deduplication
+    assert len(df_combined) == 4
+
+    aggregator = CardAggregator()
+    df_deduped = aggregator.deduplicate_cards(df_combined)
+
+    # Should have 3 rows after deduplication
+    assert len(df_deduped) == 3
+
+    # Should keep the last Sol Ring (updated text)
+    sol_ring = df_deduped[df_deduped["name"] == "Sol Ring"].iloc[0]
+    assert "updated" in sol_ring["text"]
+
+
+def test_aggregate_all(temp_dirs, sample_card_data):
+    """Test full aggregation of multiple CSV files."""
+    source_dir, output_dir = temp_dirs
+
+    # Create test CSV files
+    df1 = pd.DataFrame(
+        {
+            "name": ["Sol Ring", "Lightning Bolt"],
+            "faceName": ["Sol Ring", "Lightning Bolt"],
+            "colorIdentity": ["Colorless", "R"],
+            "manaCost": ["{1}", "{R}"],
+            "manaValue": [1, 1],
+            "type": ["Artifact", "Instant"],
+            "text": ["Add two colorless mana", "Deal 3 damage"],
+        }
+    )
+
+    df2 = pd.DataFrame(
+        {
+            "name": ["Counterspell", "Path to Exile"],
+            "faceName": ["Counterspell", "Path to Exile"],
+            "colorIdentity": ["U", "W"],
+            "manaCost": ["{U}{U}", "{W}"],
+            "manaValue": [2, 1],
+            "type": ["Instant", "Instant"],
+            "text": ["Counter target spell", "Exile target creature"],
+        }
+    )
+
+    df1.to_csv(os.path.join(source_dir, "blue_cards.csv"), index=False)
+    df2.to_csv(os.path.join(source_dir, "white_cards.csv"), index=False)
+
+    # Create excluded files (should be ignored)
+    df1.to_csv(os.path.join(source_dir, "cards.csv"), index=False)
+    df1.to_csv(os.path.join(source_dir, "commander_cards.csv"), index=False)
+
+    # Aggregate
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+    stats = aggregator.aggregate_all(source_dir, output_path)
+
+    # Verify stats
+    assert stats["files_processed"] == 2  # Only 2 files (excluded 2)
+    assert stats["total_cards"] == 4  # 2 + 2 cards
+    assert stats["duplicates_removed"] == 0
+    assert os.path.exists(output_path)
+
+    # Verify output
+    df_result = pd.read_parquet(output_path)
+    assert len(df_result) == 4
+    assert "Sol Ring" in df_result["name"].values
+    assert "Counterspell" in df_result["name"].values
+
+
+def test_aggregate_with_duplicates(temp_dirs):
+    """Test aggregation with duplicate cards across files."""
+    source_dir, output_dir = temp_dirs
+
+    # Create two files with the same card
+    df1 = pd.DataFrame(
+        {
+            "name": ["Sol Ring"],
+            "faceName": ["Sol Ring"],
+            "colorIdentity": ["Colorless"],
+            "manaCost": ["{1}"],
+            "manaValue": [1],
+            "type": ["Artifact"],
+            "text": ["Version 1"],
+        }
+    )
+
+    df2 = pd.DataFrame(
+        {
+            "name": ["Sol Ring"],
+            "faceName": ["Sol Ring"],
+            "colorIdentity": ["Colorless"],
+            "manaCost": ["{1}"],
+            "manaValue": [1],
+            "type": ["Artifact"],
+            "text": ["Version 2 (newer)"],
+        }
+    )
+
+    # Write file1 first, then file2 (file2 is newer)
+    file1 = os.path.join(source_dir, "file1.csv")
+    file2 = os.path.join(source_dir, "file2.csv")
+    df1.to_csv(file1, index=False)
+    df2.to_csv(file2, index=False)
+
+    # Make file2 newer by touching it
+    os.utime(file2, (datetime.now().timestamp() + 1, datetime.now().timestamp() + 1))
+
+    # Aggregate
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+    stats = aggregator.aggregate_all(source_dir, output_path)
+
+    # Should have removed 1 duplicate
+    assert stats["duplicates_removed"] == 1
+    assert stats["total_cards"] == 1
+
+    # Should keep the newer version (file2)
+    df_result = pd.read_parquet(output_path)
+    assert "Version 2 (newer)" in df_result["text"].iloc[0]
+
+
+def test_validate_output(temp_dirs, sample_card_data):
+    """Test output validation."""
+    source_dir, output_dir = temp_dirs
+
+    # Create and aggregate test data
+    df = pd.DataFrame(sample_card_data)
+    df.to_csv(os.path.join(source_dir, "test_cards.csv"), index=False)
+
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+    aggregator.aggregate_all(source_dir, output_path)
+
+    # Validate
+    is_valid, errors = aggregator.validate_output(output_path, source_dir)
+
+    assert is_valid
+    assert len(errors) == 0
+
+
+def test_validate_missing_file(temp_dirs):
+    """Test validation with missing output file."""
+    source_dir, output_dir = temp_dirs
+
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "nonexistent.parquet")
+
+    is_valid, errors = aggregator.validate_output(output_path, source_dir)
+
+    assert not is_valid
+    assert len(errors) > 0
+    assert "not found" in errors[0].lower()
+
+
+def test_rotate_versions(temp_dirs, sample_card_data):
+    """Test version rotation."""
+    _, output_dir = temp_dirs
+
+    # Create initial file
+    df = pd.DataFrame(sample_card_data)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+    df.to_parquet(output_path)
+
+    aggregator = CardAggregator(output_dir=output_dir)
+
+    # Rotate versions
+    aggregator.rotate_versions(output_path, keep_versions=3)
+
+    # Should have created v1
+    v1_path = os.path.join(output_dir, "all_cards_v1.parquet")
+    assert os.path.exists(v1_path)
+    assert not os.path.exists(output_path)  # Original moved to v1
+
+    # Create new file and rotate again
+    df.to_parquet(output_path)
+    aggregator.rotate_versions(output_path, keep_versions=3)
+
+    # Should have v1 and v2
+    v2_path = os.path.join(output_dir, "all_cards_v2.parquet")
+    assert os.path.exists(v1_path)
+    assert os.path.exists(v2_path)
+
+
+def test_detect_changes(temp_dirs):
+    """Test change detection for incremental updates."""
+    source_dir, output_dir = temp_dirs
+
+    # Create metadata file
+    metadata_path = os.path.join(output_dir, ".aggregate_metadata.json")
+    past_time = (datetime.now() - timedelta(hours=1)).isoformat()
+    metadata = {"timestamp": past_time}
+    with open(metadata_path, "w") as f:
+        json.dump(metadata, f)
+
+    # Create CSV files (one old, one new)
+    old_file = os.path.join(source_dir, "old_cards.csv")
+    new_file = os.path.join(source_dir, "new_cards.csv")
+
+    df = pd.DataFrame({"name": ["Test Card"]})
+    df.to_csv(old_file, index=False)
+    df.to_csv(new_file, index=False)
+
+    # Make old_file older than metadata
+    old_time = (datetime.now() - timedelta(hours=2)).timestamp()
+    os.utime(old_file, (old_time, old_time))
+
+    aggregator = CardAggregator(output_dir=output_dir)
+    changed_files = aggregator.detect_changes(source_dir, metadata_path)
+
+    # Should only detect new_file as changed
+    assert len(changed_files) == 1
+    assert os.path.basename(changed_files[0]) == "new_cards.csv"
+
+
+def test_aggregate_all_no_files(temp_dirs):
+    """Test aggregation with no CSV files."""
+    source_dir, output_dir = temp_dirs
+
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+
+    with pytest.raises(ValueError, match="No CSV files found"):
+        aggregator.aggregate_all(source_dir, output_path)
+
+
+def test_aggregate_all_empty_files(temp_dirs):
+    """Test aggregation with empty CSV files."""
+    source_dir, output_dir = temp_dirs
+
+    # Create empty CSV file
+    empty_file = os.path.join(source_dir, "empty.csv")
+    pd.DataFrame().to_csv(empty_file, index=False)
+
+    aggregator = CardAggregator(output_dir=output_dir)
+    output_path = os.path.join(output_dir, "all_cards.parquet")
+
+    with pytest.raises(ValueError, match="No valid CSV files"):
+        aggregator.aggregate_all(source_dir, output_path)
--- a/code/tests/test_card_index_color_identity_edge_cases.py
+++ b/code/tests/test_card_index_color_identity_edge_cases.py
@ -1,9 +1,15 @@
 from __future__ import annotations

+import pytest
 from pathlib import Path

 from code.web.services import card_index

+# M4 (Parquet Migration): This test relied on injecting custom CSV data via CARD_INDEX_EXTRA_CSV,
+# which is no longer supported. The card_index now loads from the global all_cards.parquet file.
+# Skipping this test as custom data injection is not possible with unified Parquet.
+pytestmark = pytest.mark.skip(reason="M4: CARD_INDEX_EXTRA_CSV removed, cannot inject test data")
+
 CSV_CONTENT = """name,themeTags,colorIdentity,manaCost,rarity
 Hybrid Test,"Blink",WG,{W/G}{W/G},uncommon
 Devoid Test,"Blink",C,3U,uncommon
@ -24,8 +30,8 @@ def test_card_index_color_identity_list_handles_edge_cases(tmp_path, monkeypatch
    csv_path = write_csv(tmp_path)
    monkeypatch.setenv("CARD_INDEX_EXTRA_CSV", str(csv_path))
    # Force rebuild
-    card_index._CARD_INDEX.clear()  # type: ignore
-    card_index._CARD_INDEX_MTIME = None  # type: ignore
+    card_index._CARD_INDEX.clear()
+    card_index._CARD_INDEX_MTIME = None
    card_index.maybe_build_index()

    pool = card_index.get_tag_pool("Blink")
--- a/code/tests/test_card_index_rarity_normalization.py
+++ b/code/tests/test_card_index_rarity_normalization.py
@ -1,6 +1,12 @@
+import pytest
 import csv
 from code.web.services import card_index

+# M4 (Parquet Migration): This test relied on monkeypatching CARD_FILES_GLOB to inject custom CSV data,
+# which is no longer supported. The card_index now loads from the global all_cards.parquet file.
+# Skipping this test as custom data injection is not possible with unified Parquet.
+pytestmark = pytest.mark.skip(reason="M4: CARD_FILES_GLOB removed, cannot inject test data")
+
 def test_rarity_normalization_and_duplicate_handling(tmp_path, monkeypatch):
    # Create a temporary CSV simulating duplicate rarities and variant casing
    csv_path = tmp_path / "cards.csv"
--- a/code/tests/test_combo_tag_applier.py
+++ b/code/tests/test_combo_tag_applier.py
@ -4,6 +4,7 @@ import json
 from pathlib import Path

 import pandas as pd
+import pytest

 from tagging.combo_tag_applier import apply_combo_tags

@ -13,6 +14,7 @@ def _write_csv(dirpath: Path, color: str, rows: list[dict]):
    df.to_csv(dirpath / f"{color}_cards.csv", index=False)


+@pytest.mark.skip(reason="M4: apply_combo_tags no longer accepts colors/csv_dir parameters - uses unified Parquet")
 def test_apply_combo_tags_bidirectional(tmp_path: Path):
    # Arrange: create a minimal CSV for blue with two combo cards
    csv_dir = tmp_path / "csv"
@ -55,12 +57,13 @@ def test_apply_combo_tags_bidirectional(tmp_path: Path):
    assert "Kiki-Jiki, Mirror Breaker" in row_conscripts.get("comboTags")


+@pytest.mark.skip(reason="M4: apply_combo_tags no longer accepts colors/csv_dir parameters - uses unified Parquet")
 def test_name_normalization_curly_apostrophes(tmp_path: Path):
    csv_dir = tmp_path / "csv"
    csv_dir.mkdir(parents=True)
    # Use curly apostrophe in CSV name, straight in combos
    rows = [
-        {"name": "Thassa’s Oracle", "themeTags": "[]", "creatureTypes": "[]"},
+        {"name": "Thassa's Oracle", "themeTags": "[]", "creatureTypes": "[]"},
        {"name": "Demonic Consultation", "themeTags": "[]", "creatureTypes": "[]"},
    ]
    _write_csv(csv_dir, "blue", rows)
@ -78,10 +81,11 @@ def test_name_normalization_curly_apostrophes(tmp_path: Path):
    counts = apply_combo_tags(colors=["blue"], combos_path=str(combos_path), csv_dir=str(csv_dir))
    assert counts.get("blue", 0) >= 1
    df = pd.read_csv(csv_dir / "blue_cards.csv")
-    row = df[df["name"] == "Thassa’s Oracle"].iloc[0]
+    row = df[df["name"] == "Thassa's Oracle"].iloc[0]
    assert "Demonic Consultation" in row["comboTags"]


+@pytest.mark.skip(reason="M4: apply_combo_tags no longer accepts colors/csv_dir parameters - uses unified Parquet")
 def test_split_card_face_matching(tmp_path: Path):
    csv_dir = tmp_path / "csv"
    csv_dir.mkdir(parents=True)
--- a/code/tests/test_commander_build_cta.py
+++ b/code/tests/test_commander_build_cta.py
@ -8,7 +8,7 @@ from urllib.parse import parse_qs, urlparse
 import pytest
 from fastapi.testclient import TestClient

-from code.web.app import app  # type: ignore
+from code.web.app import app
 from code.web.services.commander_catalog_loader import clear_commander_catalog_cache


--- a/Show more
+++ b/Show more
Author	SHA1	Message	Date
matt	0dd69c083c	removed unneeded test in project root Some checks failed CI / build (push) Has been cancelled Details Editorial Lint / lint-editorial (push) Has been cancelled Details	2025-11-07 10:18:53 -08:00
mwisnowski	c5774a04f1	Merge pull request #50 from mwisnowski/maintenance/web-unification Web UI Architecture Improvements: Modern Stack & Quality Enhancements	2025-11-07 09:24:25 -08:00
matt	e17dcf6283	feat(testing): add template validation suite and fix HTML structure issues	2025-11-04 10:08:49 -08:00
matt	40023e93b8	fix(lint): improved type checking and code maintainability	2025-10-31 10:11:00 -07:00
matt	83fe527979	fix(lint): improved type checking and code quality (77% error reduction)	2025-10-31 08:18:09 -07:00
matt	3c45a31aa3	refactor(web): finished JavaScript consolidation, tested JavaScript items, refined themes and color palettes, tested all themes and palettes, ensured all interactive lements use theme-aware css	2025-10-29 15:45:40 -07:00
matt	9379732eec	refactor(web): consolidate inline JavaScript to TypeScript modules Migrated app.js and components.js to TypeScript. Extracted inline scripts from base.html to cardHover.ts and cardImages.ts modules for better maintainability and code reuse.	2025-10-29 10:44:29 -07:00
matt	ed381dfdce	refactor(web): remove legacy card hover system (~230 lines of dead code)	2025-10-28 17:35:47 -07:00
matt	6a94b982cb	overhaul: migrated basic JavaScript to TypeScript, began consolidation efforts	2025-10-28 16:17:55 -07:00
matt	b994978f60	overhaul: migrated to tailwind css for css management, consolidated custom css, removed inline css, removed unneeded css, and otherwise improved page styling	2025-10-28 08:21:52 -07:00
mwisnowski	4802060fe1	Merge pull request #49 from mwisnowski/overhaul/json-plus-build-and-compare Some checks failed CI / build (push) Has been cancelled Details Build X and Compare: Multi-Build Analysis & Synergy Deck Creation	2025-10-21 08:14:37 -07:00
matt	f1e21873e7	feat: implement batch build and comparison	2025-10-20 18:29:53 -07:00
matt	1d95c5cbd0	chore: prepare release v3.0.1 Some checks failed CI / build (push) Has been cancelled Details	2025-10-19 14:07:55 -07:00
mwisnowski	a7f11a2261	Merge pull request #48 from mwisnowski/maintenance/commander-cache-improvements Performance Improvements & Bug Fix	2025-10-19 14:05:17 -07:00
matt	d965410200	fix: add commander_cards.parquet to GitHub download	2025-10-19 13:58:19 -07:00
matt	345dfb3e01	perf: improve commander selection speed and fix color identity display	2025-10-19 13:29:47 -07:00
matt	454269daab	chore: prepare release v3.0.0 Some checks are pending CI / build (push) Waiting to run Details	2025-10-19 09:24:25 -07:00
mwisnowski	3769ad9186	Merge pull request #47 from mwisnowski/overhaul/csv-to-parquet-migration Parquet Migration: Unified Data Format + Instant Setup	2025-10-19 09:19:06 -07:00
matt	505bbdf166	fix: handle numpy arrays in card_similarity parse_theme_tags The similarity cache build was failing because parse_theme_tags() was checking isinstance(tags, list) but Parquet files return numpy.ndarray objects. This caused all cards to be flagged as having no theme tags, resulting in an empty cache. Changed to use hasattr(__len__) check instead, which works for both lists and numpy arrays.	2025-10-19 08:26:20 -07:00
matt	bff64de370	fix: systematically handle numpy arrays from Parquet files across codebase - Add ensure_theme_tags_list() utility to builder_utils for simpler numpy array handling - Update phase3_creatures.py: 6 locations now use bu.ensure_theme_tags_list() - Update phase4_spells.py: 9 locations now use bu.ensure_theme_tags_list() - Update tagger.py: 2 locations use hasattr/list() for numpy compatibility - Update extract_themes.py: 2 locations use hasattr/list() for numpy compatibility - Fix build-similarity-cache.yml verification script to handle numpy arrays - Enhance workflow debug output to show complete row data Parquet files return numpy.ndarray objects for array columns, not Python lists. The M4 migration added numpy support to canonical parse_theme_tags() in builder_utils, but many parts of the codebase still used isinstance(list) checks that fail with arrays. This commit systematically replaces all 19 instances with proper numpy array handling. Fixes GitHub Actions workflow 'RuntimeError: No theme tags found' and verification failures.	2025-10-18 22:47:09 -07:00
matt	db0b0ccfdb	fix: handle numpy arrays in parse_theme_tags Parquet files return numpy arrays, not Python lists. Added conversion from ndarray to list before processing theme tags.	2025-10-18 22:39:53 -07:00
matt	7a94e195b7	fix: remove incorrect import inside loop - functions are in same file	2025-10-18 22:36:45 -07:00
matt	29b5da4778	fix: correct DataFrame column filtering and enhance debug output - Fix KeyError in generate_theme_catalog.py: use isCommander column correctly - DataFrame.get() doesn't work like dict.get() - use column name directly - Enhanced debug step to print full row data for better diagnostics	2025-10-18 22:32:54 -07:00
matt	a689400c47	fix: add Path wrapper in workflow debug step	2025-10-18 22:27:13 -07:00
matt	30dfca0b67	fix: remove CSV fallback from theme catalog generation, add Parquet debug step - Remove CSV fallback logic (Parquet-only in M4 migration) - Add better error messages when Parquet file missing or empty - Add workflow debug step to inspect Parquet file after tagging - Simplify build_theme_catalog function signature	2025-10-18 22:22:35 -07:00
matt	9e6c3e66e9	fix: update generate_theme_catalog to use processed/ directory	2025-10-18 22:11:46 -07:00
matt	0e19824372	fix: use generate_theme_catalog script instead of non-existent function	2025-10-18 22:07:48 -07:00
matt	5ebd3c829e	fix: create tagging completion flag in processed directory	2025-10-18 22:02:12 -07:00
matt	3694a5382d	fix: ensure theme catalog is generated before similarity cache build	2025-10-18 21:57:45 -07:00
matt	8e8b788091	fix: add detailed tag validation to CI workflow	2025-10-18 21:56:23 -07:00
matt	e92f2ccfb4	fix: handle themeTags as list in similarity cache builder	2025-10-18 21:50:12 -07:00
matt	dec6e659b8	Merge branch 'overhaul/csv-to-parquet-migration' of https://github.com/mwisnowski/mtg_python_deckbuilder into overhaul/csv-to-parquet-migration	2025-10-18 21:43:20 -07:00
matt	b92918581e	fix: use correct processed/ path for similarity cache building	2025-10-18 21:43:04 -07:00
mwisnowski	74eb47e670	Change tagging step to run in parallel	2025-10-18 21:37:07 -07:00
matt	8435312c8f	feat: migrate to unified Parquet format with instant GitHub setup and 4x faster tagging	2025-10-18 21:32:12 -07:00
mwisnowski	e9e949aae3	Merge pull request #46 from mwisnowski/maintenance/ui-user-friendliness Some checks failed CI / build (push) Has been cancelled Details chore: prepare release v2.9.1	2025-10-17 18:51:33 -07:00
matt	be6e73347a	chore: prepare release v2.9.1	2025-10-17 18:49:38 -07:00
mwisnowski	b5d11b30ef	Merge pull request #45 from mwisnowski/maintenance/ui-user-friendliness feat(ui): add similar cards refresh button and reduce sidebar animati…	2025-10-17 18:41:40 -07:00
matt	0f4d165201	feat(ui): add similar cards refresh button and reduce sidebar animation distractions	2025-10-17 18:40:15 -07:00
matt	dfddf35b4e	chore: alos include the updated dockerhub-docker-compose.yml	2025-10-17 18:04:55 -07:00
matt	23307c0d46	chore: sync docker-compose.yml and .env.example with dockerhub-docker-compose.yml	2025-10-17 18:04:32 -07:00
matt	fd240e2533	feat: bake similarity cache into Docker images - Copy card_files/ to /.defaults/card_files/ during build - Entrypoint checks for baked-in cache before downloading - Fallback to GitHub download if not present in image - Reduces startup time and bandwidth usage	2025-10-17 17:29:17 -07:00
matt	0cf7598400	fix: remove cache build dependency from dockerhub publish Docker builds don't need to wait for cache building. Cache is downloaded if available, skipped if not.	2025-10-17 17:20:17 -07:00
matt	4cf3969ae6	chore: prepare release 2.9.0	2025-10-17 17:17:20 -07:00
mwisnowski	49eabce19d	Merge pull request #44 from mwisnowski/fix/github-action-module-path Fix/GitHub action module path	2025-10-17 17:13:11 -07:00
matt	86752b351b	feat: optimize cache workflow with orphan branch and age check - Create/use orphan branch 'similarity-cache-data' for cache distribution - Add age check to dockerhub-publish: only rebuild if cache >7 days old - Use git add -f to force-add cache files (keeps .gitignore clean) - Weekly scheduled builds will keep cache fresh automatically This avoids rebuilding cache on every Docker publish while ensuring cache is always reasonably fresh (<7 days old).	2025-10-17 17:11:04 -07:00
matt	b26057f68d	fix: allow similarity cache files in card_files directory Added .gitignore exceptions for: - card_files/similarity_cache.parquet - card_files/similarity_cache_metadata.json These files need to be committed to the similarity-cache-data branch for distribution with Docker builds.	2025-10-17 16:54:51 -07:00
matt	fc911b818e	fix: correct module path for all_cards.parquet generation in CI Changed from non-existent code.web.services.card_loader to correct code.file_setup.card_aggregator.CardAggregator module. Fixes ModuleNotFoundError in build-similarity-cache workflow.	2025-10-17 16:41:44 -07:00
mwisnowski	951f5ef45a	Merge pull request #43 from mwisnowski/feature/card-browser Feature/card browser	2025-10-17 16:22:47 -07:00
matt	c2960c808e	Add card browser with similar cards and performance optimizations	2025-10-17 16:17:36 -07:00
matt	a8dc1835eb	feat(card-browser): advanced filters, keyboard shortcuts, and responsive design	2025-10-16 19:02:33 -07:00
matt	e0fe8a36e6	chore: prepare release 2.8.1 Some checks are pending CI / build (push) Waiting to run Details	2025-10-16 11:44:18 -07:00
mwisnowski	ab1aac1ee7	Merge pull request #42 from mwisnowski/feature/colorless-commander-improvements Add colorless commander filtering and display fixes	2025-10-16 11:29:49 -07:00
matt	bec984ce3e	Add colorless commander filtering and display fixes	2025-10-16 11:20:27 -07:00
matt	2eab6ab653	chore: prepare release v2.8.0 Some checks are pending CI / build (push) Waiting to run Details	2025-10-15 18:43:33 -07:00
mwisnowski	6f4b995c5f	Merge pull request #41 from mwisnowski/feature/theme-browser-tuneup feat: improve theme browser performance and add keyboard navigation	2025-10-15 18:40:24 -07:00
matt	77302f895f	feat: improve theme browser performance and add keyboard navigation	2025-10-15 18:10:17 -07:00
mwisnowski	40e676e39b	Merge pull request #40 from mwisnowski/feature/theme-catalog-optimization feat: theme catalog optimization with tag search and faster enrichment	2025-10-15 17:24:08 -07:00
matt	9e6c68f559	feat: theme catalog optimization with tag search and faster enrichment	2025-10-15 17:17:46 -07:00
mwisnowski	952b151162	Merge pull request #39 from mwisnowski/feature/all-cards-consolidation feat: consolidate card data into optimized format for faster queries and prep for future feature additions	2025-10-15 11:09:03 -07:00
matt	f70ffca23e	feat: consolidate card data into optimized format for faster queries and reduced file sizes	2025-10-15 11:04:49 -07:00
matt	5753bb19f8	chore: prepare release v2.7.1 Some checks failed CI / build (push) Has been cancelled Details	2025-10-14 17:53:15 -07:00
mwisnowski	15c11ec3d5	Merge pull request #38 from mwisnowski/fix/wizard-enhancements fix: Quick Build UI now uses full-width layout on desktop	2025-10-14 17:48:46 -07:00