mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2026-03-24 22:16:31 +01:00
166 lines
5.5 KiB
Python
166 lines
5.5 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Strip Theme Catalog Script
|
||
|
|
|
||
|
|
Removes themes with insufficient card counts from the theme catalog YAML files.
|
||
|
|
Creates backups and logs all stripped themes for reference.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python -m code.scripts.strip_catalog_themes [--min-cards N] [--no-backup] [--dry-run]
|
||
|
|
|
||
|
|
Options:
|
||
|
|
--min-cards N Override THEME_MIN_CARDS setting (default: from environment/settings)
|
||
|
|
--no-backup Skip creating backup files
|
||
|
|
--dry-run Show what would be stripped without making changes
|
||
|
|
|
||
|
|
Example:
|
||
|
|
python -m code.scripts.strip_catalog_themes
|
||
|
|
python -m code.scripts.strip_catalog_themes --min-cards 3 --dry-run
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
# Add project root to path for imports
|
||
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||
|
|
|
||
|
|
from code import settings
|
||
|
|
from code.tagging.theme_stripper import (
|
||
|
|
get_theme_card_counts,
|
||
|
|
identify_themes_to_strip,
|
||
|
|
strip_catalog_themes,
|
||
|
|
create_stripped_themes_log,
|
||
|
|
get_theme_distribution
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Strip themes with insufficient card counts from catalog YAML files"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--min-cards",
|
||
|
|
type=int,
|
||
|
|
default=settings.THEME_MIN_CARDS,
|
||
|
|
help=f"Minimum cards required to keep a theme (default: {settings.THEME_MIN_CARDS})"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--no-backup",
|
||
|
|
action="store_true",
|
||
|
|
help="Skip creating backup files before modification"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--dry-run",
|
||
|
|
action="store_true",
|
||
|
|
help="Show what would be stripped without making changes"
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
# Paths
|
||
|
|
processed_dir = Path(settings.CARD_FILES_PROCESSED_DIR)
|
||
|
|
catalog_dir = PROJECT_ROOT / 'config' / 'themes' / 'catalog'
|
||
|
|
log_dir = PROJECT_ROOT / 'logs'
|
||
|
|
stripped_log_path = log_dir / 'stripped_themes.yml'
|
||
|
|
|
||
|
|
print(f"Stripping themes from catalog (min_cards={args.min_cards})")
|
||
|
|
print(f"Catalog directory: {catalog_dir}")
|
||
|
|
print(f"Dry run: {args.dry_run}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Step 1: Get theme card counts from parquet files
|
||
|
|
print("Step 1: Analyzing theme card counts from parquet files...")
|
||
|
|
parquet_files = sorted(processed_dir.glob("*.parquet"))
|
||
|
|
if not parquet_files:
|
||
|
|
print(f"Error: No parquet files found in {processed_dir}")
|
||
|
|
return 1
|
||
|
|
|
||
|
|
print(f"Found {len(parquet_files)} parquet files")
|
||
|
|
theme_counts = get_theme_card_counts(parquet_files)
|
||
|
|
print(f"Found {len(theme_counts)} unique themes")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Step 2: Get distribution
|
||
|
|
distribution = get_theme_distribution(theme_counts)
|
||
|
|
print("Theme distribution:")
|
||
|
|
print(f" 1 card: {distribution['1_card']:4d} themes")
|
||
|
|
print(f" 2 cards: {distribution['2_cards']:4d} themes")
|
||
|
|
print(f" 3-4 cards: {distribution['3_4_cards']:4d} themes")
|
||
|
|
print(f" 5-9 cards: {distribution['5_9_cards']:4d} themes")
|
||
|
|
print(f" 10+ cards: {distribution['10_plus']:4d} themes")
|
||
|
|
print(f" Total: {distribution['total']:4d} themes")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Step 3: Identify themes to strip
|
||
|
|
themes_to_strip = identify_themes_to_strip(theme_counts, args.min_cards)
|
||
|
|
themes_to_keep = set(theme_counts.keys()) - themes_to_strip
|
||
|
|
|
||
|
|
print(f"Themes to strip: {len(themes_to_strip)} ({len(themes_to_strip)/len(theme_counts)*100:.1f}%)")
|
||
|
|
print(f"Themes to keep: {len(themes_to_keep)} ({len(themes_to_keep)/len(theme_counts)*100:.1f}%)")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Show sample of themes to strip
|
||
|
|
if themes_to_strip:
|
||
|
|
print("Sample themes to strip (first 10):")
|
||
|
|
sample = sorted(themes_to_strip)[:10]
|
||
|
|
for theme_id in sample:
|
||
|
|
count = len(theme_counts[theme_id])
|
||
|
|
cards_sample = sorted(theme_counts[theme_id])[:3]
|
||
|
|
cards_str = ", ".join(cards_sample)
|
||
|
|
if count > 3:
|
||
|
|
cards_str += f", ... ({count} total)"
|
||
|
|
print(f" - {theme_id} ({count} cards): {cards_str}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
if args.dry_run:
|
||
|
|
print("DRY RUN: No changes made")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# Step 4: Strip themes from catalog
|
||
|
|
print("Step 4: Stripping themes from catalog YAML files...")
|
||
|
|
results = strip_catalog_themes(
|
||
|
|
catalog_dir=catalog_dir,
|
||
|
|
themes_to_strip=themes_to_strip,
|
||
|
|
backup=not args.no_backup
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f" Stripped: {results['stripped_count']} themes")
|
||
|
|
print(f" Files deleted: {len(results['files_deleted'])}")
|
||
|
|
print(f" Backups created: {len(results['backups_created'])}")
|
||
|
|
|
||
|
|
if results['errors']:
|
||
|
|
print(f" Errors: {len(results['errors'])}")
|
||
|
|
for error in results['errors'][:5]: # Show first 5 errors
|
||
|
|
print(f" - {error}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Step 5: Create stripped themes log
|
||
|
|
print("Step 5: Creating stripped themes log...")
|
||
|
|
create_stripped_themes_log(
|
||
|
|
output_path=stripped_log_path,
|
||
|
|
theme_counts=theme_counts,
|
||
|
|
themes_stripped=themes_to_strip,
|
||
|
|
min_threshold=args.min_cards,
|
||
|
|
sources=["catalog YAML"]
|
||
|
|
)
|
||
|
|
print(f" Log written to {stripped_log_path}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("✅ Catalog stripping complete!")
|
||
|
|
print()
|
||
|
|
print(f"Summary:")
|
||
|
|
print(f" Total themes analyzed: {len(theme_counts)}")
|
||
|
|
print(f" Themes stripped: {len(themes_to_strip)}")
|
||
|
|
print(f" Themes remaining: {len(themes_to_keep)}")
|
||
|
|
print(f" Catalog files deleted: {len(results['files_deleted'])}")
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main())
|