""" Scryfall Bulk Data API client. Fetches bulk data JSON files from Scryfall's bulk data API, which provides all card information including image URLs without hitting rate limits. See: https://scryfall.com/docs/api/bulk-data """ import logging import os import time from typing import Any from urllib.request import Request, urlopen logger = logging.getLogger(__name__) BULK_DATA_API_URL = "https://api.scryfall.com/bulk-data" DEFAULT_BULK_TYPE = "default_cards" # All cards in Scryfall's database RATE_LIMIT_DELAY = 0.1 # 100ms between requests (50-100ms per Scryfall guidelines) class ScryfallBulkDataClient: """Client for fetching Scryfall bulk data.""" def __init__(self, rate_limit_delay: float = RATE_LIMIT_DELAY): """ Initialize Scryfall bulk data client. Args: rate_limit_delay: Seconds to wait between API requests (default 100ms) """ self.rate_limit_delay = rate_limit_delay self._last_request_time: float = 0.0 def _rate_limit_wait(self) -> None: """Wait to respect rate limits between API calls.""" elapsed = time.time() - self._last_request_time if elapsed < self.rate_limit_delay: time.sleep(self.rate_limit_delay - elapsed) self._last_request_time = time.time() def _make_request(self, url: str) -> Any: """ Make HTTP request with rate limiting and error handling. Args: url: URL to fetch Returns: Parsed JSON response Raises: Exception: If request fails after retries """ self._rate_limit_wait() try: req = Request(url) req.add_header("User-Agent", "MTG-Deckbuilder/3.0 (Image Cache)") with urlopen(req, timeout=30) as response: import json return json.loads(response.read().decode("utf-8")) except Exception as e: logger.error(f"Failed to fetch {url}: {e}") raise def get_bulk_data_info(self, bulk_type: str = DEFAULT_BULK_TYPE) -> dict[str, Any]: """ Get bulk data metadata (download URL, size, last updated). Args: bulk_type: Type of bulk data to fetch (default: default_cards) Returns: Dictionary with bulk data info including 'download_uri' Raises: ValueError: If bulk_type not found Exception: If API request fails """ logger.info(f"Fetching bulk data info for type: {bulk_type}") response = self._make_request(BULK_DATA_API_URL) # Find the requested bulk data type for item in response.get("data", []): if item.get("type") == bulk_type: logger.info( f"Found bulk data: {item.get('name')} " f"(size: {item.get('size', 0) / 1024 / 1024:.1f} MB, " f"updated: {item.get('updated_at', 'unknown')})" ) return item raise ValueError(f"Bulk data type '{bulk_type}' not found") def download_bulk_data( self, download_uri: str, output_path: str, progress_callback=None ) -> None: """ Download bulk data JSON file. Args: download_uri: Direct download URL from get_bulk_data_info() output_path: Local path to save the JSON file progress_callback: Optional callback(bytes_downloaded, total_bytes) Raises: Exception: If download fails """ logger.info(f"Downloading bulk data from: {download_uri}") logger.info(f"Saving to: {output_path}") # No rate limit on bulk data downloads per Scryfall docs try: req = Request(download_uri) req.add_header("User-Agent", "MTG-Deckbuilder/3.0 (Image Cache)") with urlopen(req, timeout=60) as response: total_size = int(response.headers.get("Content-Length", 0)) downloaded = 0 chunk_size = 1024 * 1024 # 1MB chunks # Ensure output directory exists os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, "wb") as f: while True: chunk = response.read(chunk_size) if not chunk: break f.write(chunk) downloaded += len(chunk) if progress_callback: progress_callback(downloaded, total_size) logger.info(f"Downloaded {downloaded / 1024 / 1024:.1f} MB successfully") except Exception as e: logger.error(f"Failed to download bulk data: {e}") # Clean up partial download if os.path.exists(output_path): os.remove(output_path) raise def get_bulk_data( self, bulk_type: str = DEFAULT_BULK_TYPE, output_path: str = "card_files/raw/scryfall_bulk_data.json", progress_callback=None, ) -> str: """ Fetch bulk data info and download the JSON file. Args: bulk_type: Type of bulk data to fetch output_path: Where to save the JSON file progress_callback: Optional progress callback Returns: Path to downloaded file Raises: Exception: If fetch or download fails """ info = self.get_bulk_data_info(bulk_type) download_uri = info["download_uri"] self.download_bulk_data(download_uri, output_path, progress_callback) return output_path