mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
160 lines
4.8 KiB
Python
160 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Aggregate Cards CLI Script
|
|
|
|
Command-line interface for consolidating individual card CSV files into a single
|
|
Parquet file. Useful for manual aggregation runs, testing, and recovery.
|
|
|
|
Usage:
|
|
python code/scripts/aggregate_cards.py
|
|
python code/scripts/aggregate_cards.py --source csv_files --output card_files/all_cards.parquet
|
|
python code/scripts/aggregate_cards.py --validate-only
|
|
python code/scripts/aggregate_cards.py --incremental
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path for imports
|
|
project_root = Path(__file__).parent.parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
from code.file_setup.card_aggregator import CardAggregator
|
|
from code.logging_util import get_logger
|
|
from code.settings import CSV_DIRECTORY, CARD_FILES_DIRECTORY
|
|
|
|
# Initialize logger
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def main() -> int:
|
|
"""Main entry point for aggregate_cards CLI."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Aggregate individual card CSV files into consolidated Parquet file",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--source",
|
|
"-s",
|
|
default=CSV_DIRECTORY,
|
|
help=f"Source directory containing card CSV files (default: {CSV_DIRECTORY})",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output",
|
|
"-o",
|
|
default=None,
|
|
help="Output Parquet file path (default: card_files/all_cards.parquet)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
default=CARD_FILES_DIRECTORY,
|
|
help=f"Output directory for Parquet files (default: {CARD_FILES_DIRECTORY})",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--validate-only",
|
|
action="store_true",
|
|
help="Only validate existing output file, don't aggregate",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--incremental",
|
|
"-i",
|
|
action="store_true",
|
|
help="Perform incremental update (only changed files)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--keep-versions",
|
|
type=int,
|
|
default=3,
|
|
help="Number of historical versions to keep (default: 3)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Initialize aggregator
|
|
aggregator = CardAggregator(output_dir=args.output_dir)
|
|
|
|
# Determine output path
|
|
output_path = args.output or f"{args.output_dir}/all_cards.parquet"
|
|
|
|
try:
|
|
if args.validate_only:
|
|
# Validation only mode
|
|
logger.info(f"Validating {output_path}...")
|
|
is_valid, errors = aggregator.validate_output(output_path, args.source)
|
|
|
|
if is_valid:
|
|
logger.info("✓ Validation passed")
|
|
return 0
|
|
else:
|
|
logger.error("✗ Validation failed:")
|
|
for error in errors:
|
|
logger.error(f" - {error}")
|
|
return 1
|
|
|
|
elif args.incremental:
|
|
# Incremental update mode
|
|
logger.info("Starting incremental aggregation...")
|
|
metadata_path = f"{args.output_dir}/.aggregate_metadata.json"
|
|
changed_files = aggregator.detect_changes(args.source, metadata_path)
|
|
|
|
if not changed_files:
|
|
logger.info("No changes detected, skipping aggregation")
|
|
return 0
|
|
|
|
stats = aggregator.incremental_update(changed_files, output_path)
|
|
|
|
else:
|
|
# Full aggregation mode
|
|
logger.info("Starting full aggregation...")
|
|
stats = aggregator.aggregate_all(args.source, output_path)
|
|
|
|
# Print summary
|
|
print("\n" + "=" * 60)
|
|
print("AGGREGATION SUMMARY")
|
|
print("=" * 60)
|
|
print(f"Files processed: {stats['files_processed']}")
|
|
print(f"Total cards: {stats['total_cards']:,}")
|
|
print(f"Duplicates removed: {stats['duplicates_removed']:,}")
|
|
print(f"File size: {stats['file_size_mb']:.2f} MB")
|
|
print(f"Time elapsed: {stats['elapsed_seconds']:.2f} seconds")
|
|
print(f"Output: {output_path}")
|
|
print("=" * 60)
|
|
|
|
# Run validation
|
|
logger.info("\nValidating output...")
|
|
is_valid, errors = aggregator.validate_output(output_path, args.source)
|
|
|
|
if is_valid:
|
|
logger.info("✓ Validation passed")
|
|
return 0
|
|
else:
|
|
logger.error("✗ Validation failed:")
|
|
for error in errors:
|
|
logger.error(f" - {error}")
|
|
return 1
|
|
|
|
except FileNotFoundError as e:
|
|
logger.error(f"Error: {e}")
|
|
return 1
|
|
except ValueError as e:
|
|
logger.error(f"Error: {e}")
|
|
return 1
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|