mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2026-03-25 14:36:31 +01:00
feat: implement theme stripping system with THEME_MIN_CARDS config
This commit is contained in:
parent
1ebc2fcb3c
commit
86ece36012
20 changed files with 6604 additions and 1364 deletions
165
code/scripts/strip_catalog_themes.py
Normal file
165
code/scripts/strip_catalog_themes.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Strip Theme Catalog Script
|
||||
|
||||
Removes themes with insufficient card counts from the theme catalog YAML files.
|
||||
Creates backups and logs all stripped themes for reference.
|
||||
|
||||
Usage:
|
||||
python -m code.scripts.strip_catalog_themes [--min-cards N] [--no-backup] [--dry-run]
|
||||
|
||||
Options:
|
||||
--min-cards N Override THEME_MIN_CARDS setting (default: from environment/settings)
|
||||
--no-backup Skip creating backup files
|
||||
--dry-run Show what would be stripped without making changes
|
||||
|
||||
Example:
|
||||
python -m code.scripts.strip_catalog_themes
|
||||
python -m code.scripts.strip_catalog_themes --min-cards 3 --dry-run
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path for imports
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from code import settings
|
||||
from code.tagging.theme_stripper import (
|
||||
get_theme_card_counts,
|
||||
identify_themes_to_strip,
|
||||
strip_catalog_themes,
|
||||
create_stripped_themes_log,
|
||||
get_theme_distribution
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Strip themes with insufficient card counts from catalog YAML files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-cards",
|
||||
type=int,
|
||||
default=settings.THEME_MIN_CARDS,
|
||||
help=f"Minimum cards required to keep a theme (default: {settings.THEME_MIN_CARDS})"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Skip creating backup files before modification"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would be stripped without making changes"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Paths
|
||||
processed_dir = Path(settings.CARD_FILES_PROCESSED_DIR)
|
||||
catalog_dir = PROJECT_ROOT / 'config' / 'themes' / 'catalog'
|
||||
log_dir = PROJECT_ROOT / 'logs'
|
||||
stripped_log_path = log_dir / 'stripped_themes.yml'
|
||||
|
||||
print(f"Stripping themes from catalog (min_cards={args.min_cards})")
|
||||
print(f"Catalog directory: {catalog_dir}")
|
||||
print(f"Dry run: {args.dry_run}")
|
||||
print()
|
||||
|
||||
# Step 1: Get theme card counts from parquet files
|
||||
print("Step 1: Analyzing theme card counts from parquet files...")
|
||||
parquet_files = sorted(processed_dir.glob("*.parquet"))
|
||||
if not parquet_files:
|
||||
print(f"Error: No parquet files found in {processed_dir}")
|
||||
return 1
|
||||
|
||||
print(f"Found {len(parquet_files)} parquet files")
|
||||
theme_counts = get_theme_card_counts(parquet_files)
|
||||
print(f"Found {len(theme_counts)} unique themes")
|
||||
print()
|
||||
|
||||
# Step 2: Get distribution
|
||||
distribution = get_theme_distribution(theme_counts)
|
||||
print("Theme distribution:")
|
||||
print(f" 1 card: {distribution['1_card']:4d} themes")
|
||||
print(f" 2 cards: {distribution['2_cards']:4d} themes")
|
||||
print(f" 3-4 cards: {distribution['3_4_cards']:4d} themes")
|
||||
print(f" 5-9 cards: {distribution['5_9_cards']:4d} themes")
|
||||
print(f" 10+ cards: {distribution['10_plus']:4d} themes")
|
||||
print(f" Total: {distribution['total']:4d} themes")
|
||||
print()
|
||||
|
||||
# Step 3: Identify themes to strip
|
||||
themes_to_strip = identify_themes_to_strip(theme_counts, args.min_cards)
|
||||
themes_to_keep = set(theme_counts.keys()) - themes_to_strip
|
||||
|
||||
print(f"Themes to strip: {len(themes_to_strip)} ({len(themes_to_strip)/len(theme_counts)*100:.1f}%)")
|
||||
print(f"Themes to keep: {len(themes_to_keep)} ({len(themes_to_keep)/len(theme_counts)*100:.1f}%)")
|
||||
print()
|
||||
|
||||
# Show sample of themes to strip
|
||||
if themes_to_strip:
|
||||
print("Sample themes to strip (first 10):")
|
||||
sample = sorted(themes_to_strip)[:10]
|
||||
for theme_id in sample:
|
||||
count = len(theme_counts[theme_id])
|
||||
cards_sample = sorted(theme_counts[theme_id])[:3]
|
||||
cards_str = ", ".join(cards_sample)
|
||||
if count > 3:
|
||||
cards_str += f", ... ({count} total)"
|
||||
print(f" - {theme_id} ({count} cards): {cards_str}")
|
||||
print()
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN: No changes made")
|
||||
return 0
|
||||
|
||||
# Step 4: Strip themes from catalog
|
||||
print("Step 4: Stripping themes from catalog YAML files...")
|
||||
results = strip_catalog_themes(
|
||||
catalog_dir=catalog_dir,
|
||||
themes_to_strip=themes_to_strip,
|
||||
backup=not args.no_backup
|
||||
)
|
||||
|
||||
print(f" Stripped: {results['stripped_count']} themes")
|
||||
print(f" Files deleted: {len(results['files_deleted'])}")
|
||||
print(f" Backups created: {len(results['backups_created'])}")
|
||||
|
||||
if results['errors']:
|
||||
print(f" Errors: {len(results['errors'])}")
|
||||
for error in results['errors'][:5]: # Show first 5 errors
|
||||
print(f" - {error}")
|
||||
print()
|
||||
|
||||
# Step 5: Create stripped themes log
|
||||
print("Step 5: Creating stripped themes log...")
|
||||
create_stripped_themes_log(
|
||||
output_path=stripped_log_path,
|
||||
theme_counts=theme_counts,
|
||||
themes_stripped=themes_to_strip,
|
||||
min_threshold=args.min_cards,
|
||||
sources=["catalog YAML"]
|
||||
)
|
||||
print(f" Log written to {stripped_log_path}")
|
||||
print()
|
||||
|
||||
print("✅ Catalog stripping complete!")
|
||||
print()
|
||||
print(f"Summary:")
|
||||
print(f" Total themes analyzed: {len(theme_counts)}")
|
||||
print(f" Themes stripped: {len(themes_to_strip)}")
|
||||
print(f" Themes remaining: {len(themes_to_keep)}")
|
||||
print(f" Catalog files deleted: {len(results['files_deleted'])}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue