Add card browser with similar cards and performance optimizations

This commit is contained in:
matt 2025-10-17 16:17:36 -07:00
parent a8dc1835eb
commit c2960c808e
25 changed files with 4841 additions and 1392 deletions

View file

@ -45,6 +45,11 @@ WEB_VIRTUALIZE=1 # dockerhub: WEB_VIRTUALIZE="1"
ALLOW_MUST_HAVES=1 # dockerhub: ALLOW_MUST_HAVES="1"
SHOW_MUST_HAVE_BUTTONS=0 # dockerhub: SHOW_MUST_HAVE_BUTTONS="0" (set to 1 to surface must include/exclude buttons)
WEB_THEME_PICKER_DIAGNOSTICS=0 # 1=enable uncapped synergies, diagnostics fields & /themes/metrics (dev only)
# ENABLE_CARD_DETAILS=0 # 1=show Card Details button in card browser (experimental feature)
# ENABLE_CARD_SIMILARITIES=0 # 1=enable similarity/synergy features (requires ENABLE_CARD_DETAILS=1 and manual cache build)
# SIMILARITY_CACHE_PATH= # Override similarity cache location (default: card_files/similarity_cache.json)
# SIMILARITY_CACHE_MAX_AGE_DAYS=7 # Days before showing cache refresh prompt (default: 7)
# SIMILARITY_CACHE_DOWNLOAD=1 # 1=download pre-built cache from GitHub (saves 15-20 min), 0=always build locally
############################
# Partner / Background Mechanics

View file

@ -0,0 +1,171 @@
name: Build Similarity Cache
# Manual trigger + weekly schedule + callable from other workflows
on:
workflow_dispatch:
inputs:
force_rebuild:
description: 'Force rebuild even if cache exists'
required: false
type: boolean
default: true
workflow_call: # Allow this workflow to be called by other workflows
schedule:
# Run every Sunday at 2 AM UTC
- cron: '0 2 * * 0'
jobs:
build-cache:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Check if cache needs rebuild
id: check_cache
run: |
FORCE="${{ github.event.inputs.force_rebuild }}"
if [ "$FORCE" = "true" ] || [ ! -f "card_files/similarity_cache.parquet" ]; then
echo "needs_build=true" >> $GITHUB_OUTPUT
echo "Cache doesn't exist or force rebuild requested"
else
# Check cache age via metadata JSON
CACHE_AGE_DAYS=$(python -c "
import json
from datetime import datetime
from pathlib import Path
metadata_path = Path('card_files/similarity_cache_metadata.json')
if metadata_path.exists():
with open(metadata_path) as f:
data = json.load(f)
build_date = data.get('build_date')
if build_date:
age = (datetime.now() - datetime.fromisoformat(build_date)).days
print(age)
else:
print(999)
else:
print(999)
" || echo "999")
if [ "$CACHE_AGE_DAYS" -gt 7 ]; then
echo "needs_build=true" >> $GITHUB_OUTPUT
echo "Cache is $CACHE_AGE_DAYS days old, rebuilding"
else
echo "needs_build=false" >> $GITHUB_OUTPUT
echo "Cache is only $CACHE_AGE_DAYS days old, skipping"
fi
fi
- name: Run initial setup
if: steps.check_cache.outputs.needs_build == 'true'
run: |
python -c "from code.file_setup.setup import initial_setup; initial_setup()"
- name: Run tagging (serial - more reliable in CI)
if: steps.check_cache.outputs.needs_build == 'true'
run: |
python -c "from code.tagging.tagger import run_tagging; run_tagging(parallel=False)"
- name: Build all_cards.parquet (needed for similarity cache, but not committed)
if: steps.check_cache.outputs.needs_build == 'true'
run: |
python -c "from code.web.services.card_loader import CardCatalogLoader; loader = CardCatalogLoader(); df = loader.load(); print(f'Created all_cards.parquet with {len(df):,} cards')"
- name: Build similarity cache (Parquet)
if: steps.check_cache.outputs.needs_build == 'true'
run: |
python -m code.scripts.build_similarity_cache_parquet --parallel --checkpoint-interval 1000 --force
- name: Verify cache was created
if: steps.check_cache.outputs.needs_build == 'true'
run: |
if [ ! -f "card_files/similarity_cache.parquet" ]; then
echo "ERROR: Cache Parquet file was not created"
exit 1
fi
if [ ! -f "card_files/similarity_cache_metadata.json" ]; then
echo "ERROR: Cache metadata file was not created"
exit 1
fi
# Check cache validity
python -c "
import json
from pathlib import Path
from code.web.services.similarity_cache import get_cache
cache = get_cache()
stats = cache.get_stats()
if stats['total_cards'] < 20000:
raise ValueError(f\"Cache only has {stats['total_cards']} cards, expected ~30k\")
print(f\"✓ Cache is valid with {stats['total_cards']:,} cards, {stats['total_entries']:,} entries\")
print(f\" File size: {stats['file_size_mb']:.2f} MB\")
"
- name: Get cache metadata for commit message
if: steps.check_cache.outputs.needs_build == 'true'
id: cache_meta
run: |
METADATA=$(python -c "
import json
from pathlib import Path
from code.web.services.similarity_cache import get_cache
cache = get_cache()
stats = cache.get_stats()
metadata = cache._metadata or {}
build_date = metadata.get('build_date', 'unknown')
print(f\"{stats['total_cards']} cards, {stats['total_entries']} entries, {stats['file_size_mb']:.1f}MB, built {build_date}\")
")
echo "metadata=$METADATA" >> $GITHUB_OUTPUT
- name: Commit and push cache
if: steps.check_cache.outputs.needs_build == 'true'
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
# Switch to or create dedicated cache branch
git checkout -b similarity-cache-data || git checkout similarity-cache-data
# Add only the similarity cache files (not all_cards.parquet)
git add card_files/similarity_cache.parquet
git add card_files/similarity_cache_metadata.json
# Check if there are changes to commit
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "chore: update similarity cache [${{ steps.cache_meta.outputs.metadata }}]"
git push origin similarity-cache-data --force
fi
- name: Summary
if: always()
run: |
if [ "${{ steps.check_cache.outputs.needs_build }}" = "true" ]; then
echo "✓ Similarity cache built and committed"
echo " Metadata: ${{ steps.cache_meta.outputs.metadata }}"
else
echo "⊘ Cache is recent, no rebuild needed"
fi

View file

@ -7,9 +7,15 @@ on:
workflow_dispatch:
jobs:
build-cache:
name: Build similarity cache
uses: ./.github/workflows/build-similarity-cache.yml
secrets: inherit
prepare:
name: Prepare metadata
runs-on: ubuntu-latest
needs: build-cache
permissions:
contents: read
outputs:
@ -63,6 +69,18 @@ jobs:
- name: Checkout
uses: actions/checkout@v5.0.0
- name: Download similarity cache from branch
run: |
# Download cache files from similarity-cache-data branch
mkdir -p card_files
wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache.parquet -O card_files/similarity_cache.parquet || echo "Cache not found, will build without it"
wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache_metadata.json -O card_files/similarity_cache_metadata.json || echo "Metadata not found"
if [ -f card_files/similarity_cache.parquet ]; then
echo "✓ Downloaded similarity cache"
ls -lh card_files/similarity_cache.parquet
fi
- name: Compute amd64 tag
id: arch_tag
shell: bash
@ -120,6 +138,18 @@ jobs:
- name: Checkout
uses: actions/checkout@v5.0.0
- name: Download similarity cache from branch
run: |
# Download cache files from similarity-cache-data branch
mkdir -p card_files
wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache.parquet -O card_files/similarity_cache.parquet || echo "Cache not found, will build without it"
wget -q https://raw.githubusercontent.com/${{ github.repository }}/similarity-cache-data/card_files/similarity_cache_metadata.json -O card_files/similarity_cache_metadata.json || echo "Metadata not found"
if [ -f card_files/similarity_cache.parquet ]; then
echo "✓ Downloaded similarity cache"
ls -lh card_files/similarity_cache.parquet
fi
- name: Compute arm64 tag
id: arch_tag
shell: bash

View file

@ -9,33 +9,41 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning
## [Unreleased]
### Summary
Card browser with advanced filters, keyboard shortcuts, and responsive design.
New card browser for exploring 29,839 Magic cards with advanced filters, similar card recommendations, and performance optimizations.
### Added
- **Card Browser**: Browse 26,427 Magic cards with powerful filtering at `/browse/cards`
- Fuzzy autocomplete for card names and themes with typo tolerance
- Multi-theme filtering (up to 5 themes with AND logic)
- Color identity, card type, rarity, CMC range, power/toughness filters
- Six sorting options: Name A-Z/Z-A, CMC Low/High, Power High, EDHREC Popular
- Cursor-based pagination with infinite scroll
- Shareable filter URLs for saving and sharing searches
- **Keyboard Shortcuts**: Efficient navigation without mouse
- `Enter`: Add first autocomplete match to theme filters
- `Shift+Enter`: Apply all active filters from any input field
- `Esc` (double-tap): Clear all filters globally (500ms window)
- Desktop-only keyboard shortcuts help button with tooltip
- Auto-focus theme input after adding theme (desktop only)
- **Responsive Design**: Mobile-optimized card browser with touch-friendly controls
- Adaptive grid layout (1-4 columns based on screen width)
- Theme chips with remove buttons
- Graceful 5-theme limit (input disabled, no intrusive alerts)
- Desktop-only UI elements hidden on mobile with media queries
- **Card Browser**: Browse and search all Magic cards at `/browse/cards`
- Smart autocomplete for card names and themes with typo tolerance
- Multi-theme filtering (up to 5 themes)
- Color, type, rarity, CMC, power/toughness filters
- Multiple sorting options including EDHREC popularity
- Infinite scroll with shareable filter URLs
- **Card Detail Pages**: Individual card pages with similar card suggestions
- Full card stats, oracle text, and theme tags
- Similar cards based on theme overlap
- Color-coded similarity scores
- Card preview on hover
- Enable with `ENABLE_CARD_DETAILS=1` environment variable
- **Similarity Cache**: Pre-computed card similarities for fast page loads
- Build cache with parallel processing script
- Automatically used when available
- Control with `SIMILARITY_CACHE_ENABLED` environment variable
- **Keyboard Shortcuts**: Quick navigation in card browser
- `Enter` to add autocomplete matches
- `Shift+Enter` to apply filters
- Double `Esc` to clear all filters
### Changed
- **Theme Catalog**: Improved generation to include more themes and filter out ultra-rare entries
- **Card Database**: Expanded to 29,839 cards (updated from 26,427)
- **Theme Catalog**: Improved coverage with better filtering
### Removed
- **Unused Scripts**: Removed `regenerate_parquet.py` (functionality now in web UI setup)
### Fixed
_No unreleased changes yet._
- **Card Browser UI**: Improved styling consistency and card image loading
- **Infinite Scroll**: Fixed cards appearing multiple times when loading more results
- **Sorting**: Sort order now persists correctly when scrolling through all pages
## [2.8.1] - 2025-10-16
### Summary

View file

@ -256,6 +256,8 @@ See `.env.example` for the full catalog. Common knobs:
| `THEME` | `dark` | Initial UI theme (`system`, `light`, or `dark`). |
| `WEB_STAGE_ORDER` | `new` | Build stage execution order: `new` (creatures→spells→lands) or `legacy` (lands→creatures→spells). |
| `WEB_IDEALS_UI` | `slider` | Ideal counts interface: `slider` (range inputs with live validation) or `input` (text boxes with placeholders). |
| `ENABLE_CARD_DETAILS` | `0` | Show card detail pages with similar card recommendations at `/cards/<name>`. |
| `SIMILARITY_CACHE_ENABLED` | `1` | Use pre-computed similarity cache for fast card detail pages. |
### Random build controls

View file

@ -166,17 +166,13 @@ Explore the curated commander catalog.
- MDFC merges and compatibility snapshots are handled automatically; use `--compat-snapshot` on the refresh script to emit an unmerged snapshot.
### Browse Cards
Search and filter all 26,427 Magic cards.
- **Filtering**: Search by name, themes (up to 5), color identity, type, rarity, CMC range, power/toughness
Search and explore all 29,839 Magic cards.
- **Search & Filters**: Smart autocomplete for card names and themes, multi-theme filtering (up to 5), color identity, type, rarity, CMC range, power/toughness
- **Sorting**: Name A-Z/Z-A, CMC Low/High, Power High, EDHREC Popular
- **Keyboard Shortcuts**:
- `Enter`: Add first autocomplete match to theme filters
- `Shift+Enter`: Apply all active filters
- `Esc` (double-tap): Clear all filters
- `?` button (desktop): Show keyboard shortcuts reference
- **Responsive Design**: Mobile-optimized with adaptive grid and touch controls
- **Shareable URLs**: Filter state persists in URL for saving and sharing searches
- Powered by `card_files/all_cards.parquet` with theme tag index for fast lookups
- **Card Details** (optional): Enable with `ENABLE_CARD_DETAILS=1` for individual card pages with similar card recommendations
- **Keyboard Shortcuts**: `Enter` to add matches, `Shift+Enter` to apply filters, double `Esc` to clear all
- **Shareable URLs**: Filter state persists in URL for easy sharing
- Fast lookups powered by pre-built card index and optional similarity cache (`SIMILARITY_CACHE_ENABLED=1`)
### Browse Themes
Investigate theme synergies and diagnostics.

View file

@ -1,23 +1,36 @@
# MTG Python Deckbuilder ${VERSION}
### Summary
Card browser with advanced filters, keyboard shortcuts, and responsive design.
New card browser for exploring and discovering cards with advanced filters, similar card recommendations, and fast performance.
### Added
- **Card Browser**: Browse 26,427 Magic cards with powerful filtering
- Multi-theme filtering (up to 5 themes), color identity, type, rarity, CMC, power/toughness
- Six sorting options including EDHREC popularity
- Infinite scroll with cursor-based pagination
- Shareable filter URLs
- **Keyboard Shortcuts**: Efficient navigation
- `Enter`: Add first autocomplete match
- `Shift+Enter`: Apply filters
- `Esc` (double-tap): Clear all filters
- Desktop-only help button with keyboard shortcuts reference
- **Responsive Design**: Mobile-optimized with adaptive grid layout and touch-friendly controls
- **Card Browser**: Browse and search all 29,839 Magic cards at `/browse/cards`
- Smart autocomplete with typo tolerance for card names and themes
- Multi-theme filtering (up to 5 themes)
- Color, type, rarity, CMC, power/toughness filters
- Multiple sorting options including EDHREC popularity
- Infinite scroll with shareable URLs
- **Card Detail Pages**: Individual card pages with similar card suggestions
- Enable with `ENABLE_CARD_DETAILS=1` environment variable
- Full card stats, oracle text, and theme tags
- Similar cards based on theme overlap with color-coded scores
- Card preview on hover
- **Similarity Cache**: Pre-computed card similarities for instant page loads
- Build cache with `python -m code.scripts.build_similarity_cache_parquet --parallel`
- Control with `SIMILARITY_CACHE_ENABLED` environment variable
- **Keyboard Shortcuts**: Quick navigation
- `Enter` to add autocomplete matches
- `Shift+Enter` to apply filters
- Double `Esc` to clear all filters
### Changed
- **Theme Catalog**: Improved generation to include more themes and filter out ultra-rare entries
- **Card Database**: Expanded to 29,839 cards (from 26,427)
- **Theme Catalog**: Improved coverage and filtering
### Removed
- **Unused Scripts**: Removed redundant `regenerate_parquet.py`
### Fixed
_No unreleased changes yet._
- **Card Browser**: Improved UI consistency and image loading
- **Infinite Scroll**: No more duplicate cards when loading more
- **Sorting**: Sort order now persists correctly across pages

View file

@ -0,0 +1,445 @@
"""
Build similarity cache for all cards in the database using Parquet format.
Pre-computes and stores similarity calculations for ~29k cards to improve
card detail page performance from 2-6s down to <500ms.
NOTE: This script assumes card data and tagging are already complete.
Run setup and tagging separately before building the cache.
Usage:
python -m code.scripts.build_similarity_cache_parquet [--parallel] [--checkpoint-interval 100]
Options:
--parallel Enable parallel processing (faster but uses more CPU)
--checkpoint-interval Save cache every N cards (default: 100)
--force Rebuild cache even if it exists
--dry-run Calculate without saving (for testing)
--workers N Number of parallel workers (default: auto-detect)
"""
import argparse
import logging
import sys
import time
import pandas as pd
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parents[2]
sys.path.insert(0, str(project_root))
from code.web.services.card_similarity import CardSimilarity
from code.web.services.similarity_cache import SimilarityCache, get_cache
# Setup logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Shared data for worker processes (passed during initialization, not reloaded per worker)
_shared_cards_df = None
_shared_theme_frequencies = None
_shared_cleaned_tags = None
_worker_similarity = None
def _init_worker(cards_df_pickled: bytes, theme_frequencies: dict, cleaned_tags: dict):
"""
Initialize worker process with shared data.
Called once when each worker process starts.
Args:
cards_df_pickled: Pickled DataFrame of all cards
theme_frequencies: Pre-computed theme frequency dict
cleaned_tags: Pre-computed cleaned tags cache
"""
import pickle
import logging
global _shared_cards_df, _shared_theme_frequencies, _shared_cleaned_tags, _worker_similarity
# Unpickle shared data once per worker
_shared_cards_df = pickle.loads(cards_df_pickled)
_shared_theme_frequencies = theme_frequencies
_shared_cleaned_tags = cleaned_tags
# Create worker-level CardSimilarity instance with shared data
_worker_similarity = CardSimilarity(cards_df=_shared_cards_df)
# Override pre-computed data to avoid recomputation
_worker_similarity.theme_frequencies = _shared_theme_frequencies
_worker_similarity.cleaned_tags_cache = _shared_cleaned_tags
# Suppress verbose logging in workers
logging.getLogger("card_similarity").setLevel(logging.WARNING)
def calculate_similarity_for_card(args: tuple) -> tuple[str, list[dict], bool]:
"""
Calculate similarity for a single card (worker function for parallel processing).
Args:
args: Tuple of (card_name, threshold, min_results, limit)
Returns:
Tuple of (card_name, similar_cards, success)
"""
card_name, threshold, min_results, limit = args
try:
# Use the global worker-level CardSimilarity instance
global _worker_similarity
if _worker_similarity is None:
# Fallback if initializer wasn't called (shouldn't happen)
_worker_similarity = CardSimilarity()
# Calculate without using cache (we're building it)
similar_cards = _worker_similarity.find_similar(
card_name=card_name,
threshold=threshold,
min_results=min_results,
limit=limit,
adaptive=True,
use_cache=False,
)
return card_name, similar_cards, True
except Exception as e:
logger.error(f"Failed to calculate similarity for '{card_name}': {e}")
return card_name, [], False
def _add_results_to_cache(cache_df: pd.DataFrame, card_name: str, similar_cards: list[dict]) -> pd.DataFrame:
"""
Add similarity results for a card to the cache DataFrame.
Args:
cache_df: Existing cache DataFrame
card_name: Name of the card
similar_cards: List of similar cards with scores
Returns:
Updated DataFrame
"""
# Build new rows
new_rows = []
for rank, card in enumerate(similar_cards):
new_rows.append({
"card_name": card_name,
"similar_name": card["name"],
"similarity": card["similarity"],
"edhrecRank": card.get("edhrecRank", float("inf")),
"rank": rank,
})
if new_rows:
new_df = pd.DataFrame(new_rows)
cache_df = pd.concat([cache_df, new_df], ignore_index=True)
return cache_df
def build_cache(
parallel: bool = False,
workers: int | None = None,
checkpoint_interval: int = 100,
force: bool = False,
dry_run: bool = False,
) -> None:
"""
Build similarity cache for all cards.
NOTE: Assumes card data (cards.csv, all_cards.parquet) and tagged data already exist.
Run setup and tagging separately before building cache.
Args:
parallel: Enable parallel processing
workers: Number of parallel workers (None = auto-detect)
checkpoint_interval: Save cache every N cards
force: Rebuild even if cache exists
dry_run: Calculate without saving
"""
logger.info("=" * 80)
logger.info("Similarity Cache Builder (Parquet Edition)")
logger.info("=" * 80)
logger.info("")
# Initialize cache
cache = get_cache()
# Quick check for complete cache - if metadata says build is done, exit
if not force and cache.cache_path.exists() and not dry_run:
metadata = cache._metadata or {}
is_complete = metadata.get("build_complete", False)
if is_complete:
stats = cache.get_stats()
logger.info(f"Cache already complete with {stats['total_cards']:,} cards")
logger.info("Use --force to rebuild")
return
else:
stats = cache.get_stats()
logger.info(f"Resuming incomplete cache with {stats['total_cards']:,} cards")
if dry_run:
logger.info("DRY RUN MODE - No changes will be saved")
logger.info("")
# Initialize similarity engine
logger.info("Initializing similarity engine...")
similarity = CardSimilarity()
total_cards = len(similarity.cards_df)
logger.info(f"Loaded {total_cards:,} cards")
logger.info("")
# Filter out low-value lands (single-sided with <3 tags)
df = similarity.cards_df
df["is_land"] = df["type"].str.contains("Land", case=False, na=False)
df["is_multifaced"] = df["layout"].str.lower().isin(["modal_dfc", "transform", "reversible_card", "double_faced_token"])
df["tag_count"] = df["themeTags"].apply(lambda x: len(x.split("|")) if pd.notna(x) and x else 0)
# Keep cards that are either:
# 1. Not lands, OR
# 2. Multi-faced lands, OR
# 3. Single-sided lands with >= 3 tags
keep_mask = (~df["is_land"]) | (df["is_multifaced"]) | (df["is_land"] & (df["tag_count"] >= 3))
card_names = df[keep_mask]["name"].tolist()
skipped_lands = (~keep_mask & df["is_land"]).sum()
logger.info(f"Filtered out {skipped_lands} low-value lands (single-sided with <3 tags)")
logger.info(f"Processing {len(card_names):,} cards ({len(card_names)/total_cards*100:.1f}% of total)")
logger.info("")
# Configuration for similarity calculation
threshold = 0.8
min_results = 3
limit = 20 # Cache up to 20 similar cards per card for variety
# Initialize cache data structure - try to load existing for resume
existing_cache_df = cache.load_cache()
already_processed = set()
if len(existing_cache_df) > 0 and not dry_run:
# Resume from checkpoint - keep existing data
cache_df = existing_cache_df
already_processed = set(existing_cache_df["card_name"].unique())
logger.info(f"Resuming from checkpoint with {len(already_processed):,} cards already processed")
# Setup metadata
metadata = cache._metadata or cache._empty_metadata()
else:
# Start fresh
cache_df = cache._empty_cache_df()
metadata = cache._empty_metadata()
metadata["build_date"] = datetime.now().isoformat()
metadata["threshold"] = threshold
metadata["min_results"] = min_results
# Track stats
start_time = time.time()
processed = len(already_processed) # Start count from checkpoint
failed = 0
checkpoint_count = 0
try:
if parallel:
# Parallel processing - use available CPU cores
import os
import pickle
if workers is not None:
max_workers = max(1, workers) # User-specified, minimum 1
logger.info(f"Using {max_workers} worker processes (user-specified)")
else:
cpu_count = os.cpu_count() or 4
# Use CPU count - 1 to leave one core for system, minimum 4
max_workers = max(4, cpu_count - 1)
logger.info(f"Detected {cpu_count} CPUs, using {max_workers} worker processes")
# Prepare shared data (pickle DataFrame once, share with all workers)
logger.info("Preparing shared data for workers...")
cards_df_pickled = pickle.dumps(similarity.cards_df)
theme_frequencies = similarity.theme_frequencies.copy()
cleaned_tags = similarity.cleaned_tags_cache.copy()
logger.info(f"Shared data prepared: {len(cards_df_pickled):,} bytes (DataFrame), "
f"{len(theme_frequencies)} themes, {len(cleaned_tags)} cleaned tag sets")
# Prepare arguments for cards not yet processed
cards_to_process = [name for name in card_names if name not in already_processed]
logger.info(f"Cards to process: {len(cards_to_process):,} (skipping {len(already_processed):,} already done)")
card_args = [(name, threshold, min_results, limit) for name in cards_to_process]
with ProcessPoolExecutor(
max_workers=max_workers,
initializer=_init_worker,
initargs=(cards_df_pickled, theme_frequencies, cleaned_tags)
) as executor:
# Submit all tasks
future_to_card = {
executor.submit(calculate_similarity_for_card, args): args[0]
for args in card_args
}
# Process results as they complete
for future in as_completed(future_to_card):
card_name, similar_cards, success = future.result()
if success:
cache_df = _add_results_to_cache(cache_df, card_name, similar_cards)
processed += 1
else:
failed += 1
# Progress reporting
total_to_process = len(card_names)
if processed % 100 == 0:
elapsed = time.time() - start_time
# Calculate rate based on cards processed THIS session
cards_this_session = processed - len(already_processed)
rate = cards_this_session / elapsed if elapsed > 0 else 0
cards_remaining = total_to_process - processed
eta = cards_remaining / rate if rate > 0 else 0
logger.info(
f"Progress: {processed}/{total_to_process} "
f"({processed/total_to_process*100:.1f}%) - "
f"Rate: {rate:.1f} cards/sec - "
f"ETA: {eta/60:.1f} min"
)
# Checkpoint save
if not dry_run and processed % checkpoint_interval == 0:
checkpoint_count += 1
cache.save_cache(cache_df, metadata)
logger.info(f"Checkpoint {checkpoint_count}: Saved cache with {processed:,} cards")
else:
# Serial processing - skip already processed cards
cards_to_process = [name for name in card_names if name not in already_processed]
logger.info(f"Cards to process: {len(cards_to_process):,} (skipping {len(already_processed):,} already done)")
for i, card_name in enumerate(cards_to_process, start=1):
try:
similar_cards = similarity.find_similar(
card_name=card_name,
threshold=threshold,
min_results=min_results,
limit=limit,
adaptive=True,
use_cache=False,
)
cache_df = _add_results_to_cache(cache_df, card_name, similar_cards)
processed += 1
except Exception as e:
logger.error(f"Failed to process '{card_name}': {e}")
failed += 1
# Progress reporting
if i % 100 == 0:
elapsed = time.time() - start_time
rate = i / elapsed if elapsed > 0 else 0
cards_remaining = len(card_names) - i
eta = cards_remaining / rate if rate > 0 else 0
logger.info(
f"Progress: {i}/{len(card_names)} "
f"({i/len(card_names)*100:.1f}%) - "
f"Rate: {rate:.1f} cards/sec - "
f"ETA: {eta/60:.1f} min"
)
# Checkpoint save
if not dry_run and i % checkpoint_interval == 0:
checkpoint_count += 1
cache.save_cache(cache_df, metadata)
logger.info(f"Checkpoint {checkpoint_count}: Saved cache with {i:,} cards")
# Final save
if not dry_run:
metadata["last_updated"] = datetime.now().isoformat()
metadata["build_complete"] = True
cache.save_cache(cache_df, metadata)
# Summary
elapsed = time.time() - start_time
logger.info("")
logger.info("=" * 80)
logger.info("Build Complete")
logger.info("=" * 80)
logger.info(f"Total time: {elapsed/60:.2f} minutes")
logger.info(f"Cards processed: {processed:,}")
logger.info(f"Failed: {failed}")
logger.info(f"Checkpoints saved: {checkpoint_count}")
if processed > 0:
logger.info(f"Average rate: {processed/elapsed:.2f} cards/sec")
if not dry_run:
stats = cache.get_stats()
logger.info(f"Cache file size: {stats.get('file_size_mb', 0):.2f} MB")
logger.info(f"Cache location: {cache.cache_path}")
except KeyboardInterrupt:
logger.warning("\nBuild interrupted by user")
# Save partial cache
if not dry_run and len(cache_df) > 0:
metadata["last_updated"] = datetime.now().isoformat()
cache.save_cache(cache_df, metadata)
logger.info(f"Saved partial cache with {processed:,} cards")
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(
description="Build similarity cache for all cards (Parquet format)"
)
parser.add_argument(
"--parallel",
action="store_true",
help="Enable parallel processing",
)
parser.add_argument(
"--workers",
type=int,
default=None,
help="Number of parallel workers (default: auto-detect)",
)
parser.add_argument(
"--checkpoint-interval",
type=int,
default=100,
help="Save cache every N cards (default: 100)",
)
parser.add_argument(
"--force",
action="store_true",
help="Rebuild cache even if it exists",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Calculate without saving (for testing)",
)
args = parser.parse_args()
build_cache(
parallel=args.parallel,
workers=args.workers,
checkpoint_interval=args.checkpoint_interval,
force=args.force,
dry_run=args.dry_run,
)
if __name__ == "__main__":
main()

View file

@ -124,4 +124,25 @@ TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECTION_GRANTS', '1').lower() not in (
TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '1').lower() not in ('0', 'false', 'off', 'disabled')
# M5: Enable protection scope filtering in deck builder (completed - Phase 1-3, in progress Phase 4+)
TAG_PROTECTION_SCOPE = os.getenv('TAG_PROTECTION_SCOPE', '1').lower() not in ('0', 'false', 'off', 'disabled')
TAG_PROTECTION_SCOPE = os.getenv('TAG_PROTECTION_SCOPE', '1').lower() not in ('0', 'false', 'off', 'disabled')
# ----------------------------------------------------------------------------------
# CARD BROWSER FEATURE FLAGS
# ----------------------------------------------------------------------------------
# Enable card detail pages (default: OFF)
# Set to '1' or 'true' to enable card detail pages in card browser
ENABLE_CARD_DETAILS = os.getenv('ENABLE_CARD_DETAILS', '0').lower() not in ('0', 'false', 'off', 'disabled')
# Enable similarity/synergy features (default: OFF)
# Requires ENABLE_CARD_DETAILS=1 and manual cache build via Setup/Tag page
# Shows similar cards based on theme tag overlap using containment scoring
ENABLE_CARD_SIMILARITIES = os.getenv('ENABLE_CARD_SIMILARITIES', '0').lower() not in ('0', 'false', 'off', 'disabled')
# Similarity cache configuration
SIMILARITY_CACHE_PATH = os.getenv('SIMILARITY_CACHE_PATH', 'card_files/similarity_cache.json')
SIMILARITY_CACHE_MAX_AGE_DAYS = int(os.getenv('SIMILARITY_CACHE_MAX_AGE_DAYS', '7'))
# Allow downloading pre-built cache from GitHub (saves 15-20 min build time)
# Set to '0' to always build locally (useful for custom seeds or offline environments)
SIMILARITY_CACHE_DOWNLOAD = os.getenv('SIMILARITY_CACHE_DOWNLOAD', '1').lower() not in ('0', 'false', 'off', 'disabled')

View file

@ -69,6 +69,14 @@ async def _lifespan(app: FastAPI): # pragma: no cover - simple infra glue
get_theme_index() # Slower: parses cards for theme-to-card mapping
except Exception:
pass
# Warm CardSimilarity singleton (if card details enabled) - runs after theme index loads cards
try:
from code.settings import ENABLE_CARD_DETAILS
if ENABLE_CARD_DETAILS:
from .routes.card_browser import get_similarity # type: ignore
get_similarity() # Pre-initialize singleton (one-time cost: ~2-3s)
except Exception:
pass
yield # (no shutdown tasks currently)
@ -2202,6 +2210,7 @@ async def setup_status():
except Exception:
return JSONResponse({"running": False, "phase": "error"})
# Routers
from .routes import build as build_routes # noqa: E402
from .routes import configs as config_routes # noqa: E402
@ -2233,6 +2242,8 @@ except Exception:
pass
## (Additional startup warmers consolidated into lifespan handler)
## Note: CardSimilarity uses lazy initialization pattern like AllCardsLoader
## First card detail page loads in ~200ms (singleton init), subsequent in ~60ms
# --- Exception handling ---
def _wants_html(request: Request) -> bool:

View file

@ -9,6 +9,7 @@ from __future__ import annotations
import logging
from difflib import SequenceMatcher
from typing import TYPE_CHECKING
import pandas as pd
from fastapi import APIRouter, Request, Query
@ -19,9 +20,14 @@ from ..app import templates
try:
from code.services.all_cards_loader import AllCardsLoader
from code.deck_builder.builder_utils import parse_theme_tags
from code.settings import ENABLE_CARD_DETAILS
except ImportError:
from services.all_cards_loader import AllCardsLoader
from deck_builder.builder_utils import parse_theme_tags
from settings import ENABLE_CARD_DETAILS
if TYPE_CHECKING:
from code.web.services.card_similarity import CardSimilarity
logger = logging.getLogger(__name__)
@ -31,6 +37,7 @@ router = APIRouter(prefix="/cards", tags=["card-browser"])
_loader: AllCardsLoader | None = None
_theme_index: dict[str, set[int]] | None = None # theme_lower -> set of card indices
_theme_catalog: list[str] | None = None # cached list of all theme names from catalog
_similarity: "CardSimilarity | None" = None # cached CardSimilarity instance
def get_loader() -> AllCardsLoader:
@ -41,6 +48,28 @@ def get_loader() -> AllCardsLoader:
return _loader
def get_similarity() -> "CardSimilarity":
"""
Get cached CardSimilarity instance.
CardSimilarity initialization is expensive (pre-computes tags for 29k cards,
loads cache with 277k entries). Cache it globally to avoid re-initialization
on every card detail page load.
Returns:
Cached CardSimilarity instance
"""
global _similarity
if _similarity is None:
from code.web.services.card_similarity import CardSimilarity
loader = get_loader()
df = loader.load()
logger.info("Initializing CardSimilarity singleton (one-time cost)...")
_similarity = CardSimilarity(df)
logger.info("CardSimilarity singleton ready")
return _similarity
def get_theme_catalog() -> list[str]:
"""
Get cached list of all theme names from theme_catalog.csv.
@ -497,6 +526,7 @@ async def card_browser_index(
"per_page": per_page,
"current_page": current_page,
"total_pages": total_pages,
"enable_card_details": ENABLE_CARD_DETAILS,
},
)
@ -519,6 +549,7 @@ async def card_browser_index(
"all_rarities": [],
"per_page": 20,
"error": "Card data not available. Please run setup to generate all_cards.parquet.",
"enable_card_details": ENABLE_CARD_DETAILS,
},
)
except Exception as e:
@ -540,6 +571,7 @@ async def card_browser_index(
"all_rarities": [],
"per_page": 20,
"error": f"Error loading cards: {str(e)}",
"enable_card_details": ENABLE_CARD_DETAILS,
},
)
@ -757,8 +789,19 @@ async def card_browser_grid(
filtered_df = filtered_df.drop('_sort_key', axis=1)
# Cursor-based pagination
# Cursor is the card name - skip all cards until we find it, then take next batch
if cursor:
filtered_df = filtered_df[filtered_df['name'] > cursor]
try:
# Find the position of the cursor card in the sorted dataframe
cursor_position = filtered_df[filtered_df['name'] == cursor].index
if len(cursor_position) > 0:
# Get the iloc position (row number, not index label)
cursor_iloc = filtered_df.index.get_loc(cursor_position[0])
# Skip past the cursor card (take everything after it)
filtered_df = filtered_df.iloc[cursor_iloc + 1:]
except (KeyError, IndexError):
# Cursor card not found - might have been filtered out, just proceed
pass
per_page = 20
cards_page = filtered_df.head(per_page)
@ -815,6 +858,7 @@ async def card_browser_grid(
"power_max": power_max,
"tough_min": tough_min,
"tough_max": tough_max,
"enable_card_details": ENABLE_CARD_DETAILS,
},
)
@ -1120,3 +1164,110 @@ async def card_theme_autocomplete(
logger.error(f"Error in theme autocomplete: {e}", exc_info=True)
return HTMLResponse(content=f'<div class="autocomplete-error">Error: {str(e)}</div>')
@router.get("/{card_name}", response_class=HTMLResponse)
async def card_detail(request: Request, card_name: str):
"""
Display detailed information about a single card with similar cards.
Args:
card_name: URL-encoded card name
Returns:
HTML page with card details and similar cards section
"""
try:
from urllib.parse import unquote
# Decode URL-encoded card name
card_name = unquote(card_name)
# Load card data
loader = get_loader()
df = loader.load()
# Find the card
card_row = df[df['name'] == card_name]
if card_row.empty:
# Card not found - return 404 page
return templates.TemplateResponse(
"error.html",
{
"request": request,
"error_code": 404,
"error_message": f"Card not found: {card_name}",
"back_link": "/cards",
"back_text": "Back to Card Browser"
},
status_code=404
)
# Get card data as dict
card = card_row.iloc[0].to_dict()
# Parse theme tags using helper function
card['themeTags_parsed'] = parse_theme_tags(card.get('themeTags', ''))
# Calculate similar cards using cached singleton
similarity = get_similarity()
similar_cards = similarity.find_similar(
card_name,
threshold=0.8, # Start at 80%
limit=5, # Show 3-5 cards
min_results=3, # Target minimum 3
adaptive=True # Enable adaptive thresholds (80% → 60%)
)
# Enrich similar cards with full data
for similar in similar_cards:
similar_row = df[df['name'] == similar['name']]
if not similar_row.empty:
similar_data = similar_row.iloc[0].to_dict()
# Parse theme tags before updating (so we have the list, not string)
theme_tags_parsed = parse_theme_tags(similar_data.get('themeTags', ''))
similar.update(similar_data)
# Set the parsed tags list (not the string version from df)
similar['themeTags'] = theme_tags_parsed
# Log card detail page access
if similar_cards:
threshold_pct = similar_cards[0].get('threshold_used', 0) * 100
logger.info(
f"Card detail page for '{card_name}': found {len(similar_cards)} similar cards "
f"(threshold: {threshold_pct:.0f}%)"
)
else:
logger.info(f"Card detail page for '{card_name}': no similar cards found")
# Get main card's theme tags for overlap highlighting
main_card_tags = card.get('themeTags_parsed', [])
return templates.TemplateResponse(
"browse/cards/detail.html",
{
"request": request,
"card": card,
"similar_cards": similar_cards,
"main_card_tags": main_card_tags,
}
)
except Exception as e:
logger.error(f"Error loading card detail for '{card_name}': {e}", exc_info=True)
return templates.TemplateResponse(
"error.html",
{
"request": request,
"error_code": 500,
"error_message": f"Error loading card details: {str(e)}",
"back_link": "/cards",
"back_text": "Back to Card Browser"
},
status_code=500
)

View file

@ -157,4 +157,8 @@ async def rebuild_cards():
@router.get("/", response_class=HTMLResponse)
async def setup_index(request: Request) -> HTMLResponse:
return templates.TemplateResponse("setup/index.html", {"request": request})
import code.settings as settings
return templates.TemplateResponse("setup/index.html", {
"request": request,
"similarity_enabled": settings.ENABLE_CARD_SIMILARITIES
})

View file

@ -0,0 +1,483 @@
"""
Card similarity service using Jaccard index on theme tags.
Provides similarity scoring between cards based on theme tag overlap.
Used for "Similar Cards" feature in card browser.
Supports persistent caching for improved performance (2-6s <500ms).
Uses "signature tags" approach: compares top 5 most frequent tags instead
of all tags, significantly improving performance and quality.
"""
import ast
import logging
import random
from pathlib import Path
from typing import Optional
import pandas as pd
from code.web.services.similarity_cache import SimilarityCache, get_cache
logger = logging.getLogger(__name__)
class CardSimilarity:
"""Calculate card similarity using theme tag overlap (Jaccard index) with caching."""
def __init__(self, cards_df: Optional[pd.DataFrame] = None, cache: Optional[SimilarityCache] = None):
"""
Initialize similarity calculator.
Args:
cards_df: DataFrame with card data. If None, loads from all_cards.parquet
cache: SimilarityCache instance. If None, uses global singleton
"""
if cards_df is None:
# Load from default location
parquet_path = Path(__file__).parents[3] / "card_files" / "all_cards.parquet"
logger.info(f"Loading cards from {parquet_path}")
self.cards_df = pd.read_parquet(parquet_path)
else:
self.cards_df = cards_df
# Initialize cache
self.cache = cache if cache is not None else get_cache()
# Load theme frequencies from catalog
self.theme_frequencies = self._load_theme_frequencies()
# Pre-compute cleaned tags (with exclusions) for all cards (one-time cost, huge speedup)
# This removes "Historics Matter" and "Legends Matter" from all cards
self.cleaned_tags_cache = self._precompute_cleaned_tags()
# Pre-compute card metadata (EDHREC rank) for fast lookups
self._card_metadata = self._precompute_card_metadata()
# Inverted index (tag -> set of card names) - built lazily on first use
self._tag_to_cards_index = None
logger.info(
f"Initialized CardSimilarity with {len(self.cards_df)} cards "
f"and {len(self.theme_frequencies)} theme frequencies "
f"(cache: {'enabled' if self.cache.enabled else 'disabled'})"
)
def _load_theme_frequencies(self) -> dict[str, int]:
"""
Load theme frequencies from theme_catalog.csv.
Returns:
Dict mapping theme name to card_count (higher = more common)
"""
catalog_path = Path(__file__).parents[3] / "config" / "themes" / "theme_catalog.csv"
try:
# Read CSV, skipping comment line
df = pd.read_csv(catalog_path, comment="#")
# Create dict mapping theme -> card_count
# Higher card_count = more common/frequent theme
frequencies = dict(zip(df["theme"], df["card_count"]))
logger.info(f"Loaded {len(frequencies)} theme frequencies from catalog")
return frequencies
except Exception as e:
logger.warning(f"Failed to load theme frequencies: {e}, using empty dict")
return {}
def _precompute_cleaned_tags(self) -> dict[str, set[str]]:
"""
Pre-compute cleaned tags for all cards.
Removes overly common tags like "Historics Matter" and "Legends Matter"
that don't provide meaningful similarity. This is done once during
initialization to avoid recalculating for every comparison.
Returns:
Dict mapping card name -> cleaned tags (full set minus exclusions)
"""
logger.info("Pre-computing cleaned tags for all cards...")
excluded_tags = {"Historics Matter", "Legends Matter"}
cleaned = {}
for _, row in self.cards_df.iterrows():
card_name = row["name"]
tags = self.parse_theme_tags(row["themeTags"])
if tags:
# Remove excluded tags
cleaned_tags = tags - excluded_tags
if cleaned_tags: # Only store if card has tags after exclusion
cleaned[card_name] = cleaned_tags
logger.info(f"Pre-computed {len(cleaned)} card tag sets")
return cleaned
def _precompute_card_metadata(self) -> dict[str, dict]:
"""
Pre-compute card metadata (EDHREC rank, etc.) for fast lookups.
Returns:
Dict mapping card name -> metadata dict
"""
logger.info("Pre-computing card metadata...")
metadata = {}
for _, row in self.cards_df.iterrows():
card_name = row["name"]
edhrec_rank = row.get("edhrecRank")
# Convert to float, use inf for NaN/None
edhrec_rank = float(edhrec_rank) if pd.notna(edhrec_rank) else float('inf')
metadata[card_name] = {
"edhrecRank": edhrec_rank,
}
logger.info(f"Pre-computed metadata for {len(metadata)} cards")
return metadata
def _build_tag_index(self) -> None:
"""
Build inverted index: tag -> set of card names that have this tag.
This allows fast candidate filtering - instead of checking all 29k cards,
we only check cards that share at least one tag with the target.
Performance impact: Reduces 29k comparisons to typically 100-2000 comparisons.
"""
logger.info("Building inverted tag index...")
index = {}
for card_name, tags in self.cleaned_tags_cache.items():
for tag in tags:
if tag not in index:
index[tag] = set()
index[tag].add(card_name)
self._tag_to_cards_index = index
# Log statistics
avg_cards_per_tag = sum(len(cards) for cards in index.values()) / len(index) if index else 0
logger.info(
f"Built tag index: {len(index)} unique tags, "
f"avg {avg_cards_per_tag:.1f} cards per tag"
)
def get_signature_tags(
self,
card_tags: set[str],
top_n: int = 5,
random_n: Optional[int] = None,
seed: Optional[int] = None,
) -> set[str]:
"""
Get signature tags for similarity comparison.
Takes the most frequent (popular) tags PLUS random tags for diversity.
This balances defining characteristics with discovery of niche synergies.
Excludes overly common tags like "Historics Matter" and "Legends Matter"
that appear on most legendary cards and don't provide meaningful similarity.
Args:
card_tags: Full set of card theme tags
top_n: Number of most frequent tags to use (default 5)
random_n: Number of random tags to add. If None, auto-scales:
- 6-10 tags: 1 random
- 11-15 tags: 2 random
- 16+ tags: 3 random
seed: Random seed for reproducibility (default: None)
Returns:
Set of signature tags (top_n most frequent + random_n random)
"""
# Exclude overly common tags that don't provide meaningful similarity
excluded_tags = {"Historics Matter", "Legends Matter"}
card_tags = card_tags - excluded_tags
if len(card_tags) <= top_n:
return card_tags # Use all if card has few tags
# Auto-scale random_n based on total tag count if not specified
if random_n is None:
tag_count = len(card_tags)
if tag_count >= 16:
random_n = 3
elif tag_count >= 11:
random_n = 2
elif tag_count >= 6:
random_n = 1
else:
random_n = 0 # Very few tags, no random needed
# Sort tags by frequency (higher card_count = more common = higher priority)
sorted_tags = sorted(
card_tags,
key=lambda t: -self.theme_frequencies.get(t, 0), # Negate for descending order
)
# Take top N most frequent tags
signature = set(sorted_tags[:top_n])
# Add random tags from remaining tags
remaining_tags = card_tags - signature
if remaining_tags and random_n > 0:
if seed is not None:
random.seed(seed)
# Sample min(random_n, len(remaining_tags)) to avoid errors
sample_size = min(random_n, len(remaining_tags))
random_tags = set(random.sample(list(remaining_tags), sample_size))
signature = signature | random_tags
return signature
@staticmethod
def parse_theme_tags(tags: str | list) -> set[str]:
"""
Parse theme tags from string or list format.
Args:
tags: Theme tags as string representation of list or actual list
Returns:
Set of theme tag strings
"""
if pd.isna(tags) or not tags:
return set()
if isinstance(tags, list):
return set(tags)
if isinstance(tags, str):
# Handle string representation of list: "['tag1', 'tag2']"
try:
parsed = ast.literal_eval(tags)
if isinstance(parsed, list):
return set(parsed)
return set()
except (ValueError, SyntaxError):
# If parsing fails, return empty set
logger.warning(f"Failed to parse theme tags: {tags[:100]}")
return set()
return set()
@staticmethod
def calculate_similarity(tags_a: set[str], tags_b: set[str]) -> float:
"""
Calculate Jaccard similarity between two sets of theme tags.
Jaccard index = intersection / union
Args:
tags_a: First set of theme tags
tags_b: Second set of theme tags
Returns:
Similarity score from 0.0 (no overlap) to 1.0 (identical)
"""
if not tags_a or not tags_b:
return 0.0
intersection = len(tags_a & tags_b)
union = len(tags_a | tags_b)
if union == 0:
return 0.0
return intersection / union
def get_card_tags(self, card_name: str) -> Optional[set[str]]:
"""
Get theme tags for a specific card.
Args:
card_name: Name of the card
Returns:
Set of theme tags, or None if card not found
"""
card_row = self.cards_df[self.cards_df["name"] == card_name]
if card_row.empty:
return None
tags = card_row.iloc[0]["themeTags"]
return self.parse_theme_tags(tags)
def find_similar(
self,
card_name: str,
threshold: float = 0.8,
limit: int = 10,
min_results: int = 3,
adaptive: bool = True,
use_cache: bool = True,
) -> list[dict]:
"""
Find cards with similar theme tags.
Uses adaptive threshold scaling to ensure minimum number of results.
Tries 80% 60% thresholds until min_results is met (skips 70% for performance).
Checks cache first for pre-computed results, falls back to real-time calculation.
Args:
card_name: Name of the target card
threshold: Starting similarity threshold (0.0-1.0), default 0.8 (80%)
limit: Maximum number of results, default 10
min_results: Minimum desired results for adaptive scaling, default 3
adaptive: Enable adaptive threshold scaling, default True
use_cache: Check cache first before calculating, default True
Returns:
List of dicts with keys: name, similarity, themeTags, edhrecRank, threshold_used
Sorted by similarity descending, then by EDHREC rank ascending (more popular first)
Returns empty list if card not found or has no tags
"""
# Check cache first
if use_cache and self.cache.enabled:
cached_results = self.cache.get_similar(card_name, limit=limit, randomize=True)
if cached_results is not None:
logger.info(f"Cache HIT for '{card_name}' ({len(cached_results)} results, randomized)")
return cached_results
else:
logger.info(f"Cache MISS for '{card_name}', calculating...")
# Get target card tags
target_tags = self.get_card_tags(card_name)
if target_tags is None:
logger.warning(f"Card not found: {card_name}")
return []
if not target_tags:
logger.info(f"Card has no theme tags: {card_name}")
return []
# Get signature tags for TARGET card only (top 5 most frequent + 1-3 random)
# This focuses the search on the target's defining characteristics
# with some diversity from random tags
# Use card name hash as seed for reproducible randomness per card
card_seed = hash(card_name) % (2**31)
target_signature = self.get_signature_tags(
target_tags,
top_n=5,
seed=card_seed
)
logger.debug(
f"Target '{card_name}': {len(target_tags)} tags → "
f"{len(target_signature)} signature tags"
)
# Try adaptive thresholds if enabled
thresholds_to_try = [threshold]
if adaptive:
# Build list of thresholds to try: 80% → 60% → 50% (skip 70% for performance)
thresholds_to_try = []
if threshold >= 0.8:
thresholds_to_try.append(0.8)
if threshold >= 0.6:
thresholds_to_try.append(0.6)
if threshold >= 0.5:
thresholds_to_try.append(0.5)
# Remove duplicates and sort descending
thresholds_to_try = sorted(set(thresholds_to_try), reverse=True)
results = []
threshold_used = threshold
for current_threshold in thresholds_to_try:
# Use inverted index for fast candidate filtering
# Instead of checking all 29k cards, only check cards that share at least one signature tag
results = []
# Build inverted index on first use (lazily)
if self._tag_to_cards_index is None:
self._build_tag_index()
# Get candidate cards that share at least one signature tag
# This drastically reduces the number of cards we need to check
candidate_cards = set()
for tag in target_signature:
if tag in self._tag_to_cards_index:
candidate_cards.update(self._tag_to_cards_index[tag])
# Remove the target card itself
candidate_cards.discard(card_name)
if not candidate_cards:
continue # No candidates at all, try lower threshold
# Now calculate scores only for candidates (vectorized where possible)
# Pre-filter candidates by checking if they meet minimum overlap requirement
min_overlap = int(len(target_signature) * current_threshold)
for candidate_name in candidate_cards:
candidate_tags = self.cleaned_tags_cache.get(candidate_name)
if not candidate_tags:
continue
# Fast overlap check using set intersection
overlap = target_signature & candidate_tags
overlap_count = len(overlap)
# Quick filter: skip if overlap too small
if overlap_count < min_overlap:
continue
# Calculate exact containment score
containment_score = overlap_count / len(target_signature)
if containment_score >= current_threshold:
# Get EDHREC rank efficiently from card metadata
edhrec_rank = self._card_metadata.get(candidate_name, {}).get('edhrecRank', float('inf'))
results.append({
"name": candidate_name,
"similarity": containment_score,
"themeTags": list(candidate_tags),
"edhrecRank": edhrec_rank,
})
# Sort by similarity descending, then by EDHREC rank ascending (lower is better)
# Unranked cards (inf) will appear last
results.sort(key=lambda x: (-x["similarity"], x["edhrecRank"]))
# Check if we have enough results
if len(results) >= min_results or not adaptive:
threshold_used = current_threshold
break
# Log that we're trying a lower threshold
logger.info(
f"Found {len(results)} results at {current_threshold:.0%} "
f"for '{card_name}', trying lower threshold..."
)
# Add threshold_used to results
for result in results:
result["threshold_used"] = threshold_used
logger.info(
f"Found {len(results)} similar cards for '{card_name}' "
f"at {threshold_used:.0%} threshold"
)
final_results = results[:limit]
# Cache the results for future lookups
if use_cache and self.cache.enabled and final_results:
self.cache.set_similar(card_name, final_results)
logger.debug(f"Cached {len(final_results)} results for '{card_name}'")
return final_results

View file

@ -0,0 +1,386 @@
"""
Similarity cache manager for card similarity calculations.
Provides persistent caching of pre-computed card similarity scores to improve
card detail page load times from 2-6s down to <500ms.
Cache format: Parquet file with columnar structure:
- card_name: str (source card)
- similar_name: str (similar card name)
- similarity: float (similarity score)
- edhrecRank: float (EDHREC rank of similar card)
- rank: int (ranking position, 0-19 for top 20)
Metadata stored in separate JSON sidecar file.
Benefits vs JSON:
- 5-10x faster load times
- 50-70% smaller file size
- Better compression for large datasets
- Consistent with other card data storage
"""
import json
import logging
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from datetime import datetime
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Default cache settings
CACHE_VERSION = "2.0" # Bumped for Parquet format
DEFAULT_CACHE_PATH = Path(__file__).parents[3] / "card_files" / "similarity_cache.parquet"
DEFAULT_METADATA_PATH = Path(__file__).parents[3] / "card_files" / "similarity_cache_metadata.json"
class SimilarityCache:
"""Manages persistent cache for card similarity calculations using Parquet."""
def __init__(self, cache_path: Optional[Path] = None, enabled: bool = True):
"""
Initialize similarity cache manager.
Args:
cache_path: Path to cache file. If None, uses DEFAULT_CACHE_PATH
enabled: Whether cache is enabled (can be disabled via env var)
"""
self.cache_path = cache_path or DEFAULT_CACHE_PATH
self.metadata_path = self.cache_path.with_name(
self.cache_path.stem + "_metadata.json"
)
self.enabled = enabled and os.getenv("SIMILARITY_CACHE_ENABLED", "1") == "1"
self._cache_df: Optional[pd.DataFrame] = None
self._metadata: Optional[dict] = None
# Ensure cache directory exists
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
if self.enabled:
logger.info(f"SimilarityCache initialized at {self.cache_path}")
else:
logger.info("SimilarityCache disabled")
def load_cache(self) -> pd.DataFrame:
"""
Load cache from disk.
Returns:
DataFrame with columns: card_name, similar_name, similarity, edhrecRank, rank
Returns empty DataFrame if file doesn't exist or loading fails
"""
if not self.enabled:
return self._empty_cache_df()
if self._cache_df is not None:
return self._cache_df
if not self.cache_path.exists():
logger.info("Cache file not found, returning empty cache")
self._cache_df = self._empty_cache_df()
return self._cache_df
try:
# Load Parquet file
self._cache_df = pq.read_table(self.cache_path).to_pandas()
# Load metadata
if self.metadata_path.exists():
with open(self.metadata_path, "r", encoding="utf-8") as f:
self._metadata = json.load(f)
else:
self._metadata = self._empty_metadata()
# Validate cache structure
if not self._validate_cache(self._cache_df):
logger.warning("Cache validation failed, returning empty cache")
self._cache_df = self._empty_cache_df()
return self._cache_df
total_cards = len(self._cache_df["card_name"].unique()) if len(self._cache_df) > 0 else 0
logger.info(
f"Loaded similarity cache v{self._metadata.get('version', 'unknown')} with {total_cards:,} cards ({len(self._cache_df):,} entries)"
)
return self._cache_df
except Exception as e:
logger.error(f"Failed to load cache: {e}")
self._cache_df = self._empty_cache_df()
return self._cache_df
def save_cache(self, cache_df: pd.DataFrame, metadata: Optional[dict] = None) -> bool:
"""
Save cache to disk.
Args:
cache_df: DataFrame with similarity data
metadata: Optional metadata dict. If None, uses current metadata with updates.
Returns:
True if save successful, False otherwise
"""
if not self.enabled:
logger.debug("Cache disabled, skipping save")
return False
try:
# Ensure directory exists
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
# Update metadata
if metadata is None:
metadata = self._metadata or self._empty_metadata()
total_cards = len(cache_df["card_name"].unique()) if len(cache_df) > 0 else 0
metadata["total_cards"] = total_cards
metadata["last_updated"] = datetime.now().isoformat()
metadata["total_entries"] = len(cache_df)
# Write Parquet file (with compression)
temp_cache = self.cache_path.with_suffix(".tmp")
pq.write_table(
pa.table(cache_df),
temp_cache,
compression="snappy",
version="2.6",
)
temp_cache.replace(self.cache_path)
# Write metadata file
temp_meta = self.metadata_path.with_suffix(".tmp")
with open(temp_meta, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
temp_meta.replace(self.metadata_path)
self._cache_df = cache_df
self._metadata = metadata
logger.info(f"Saved similarity cache with {total_cards:,} cards ({len(cache_df):,} entries)")
return True
except Exception as e:
logger.error(f"Failed to save cache: {e}")
return False
def get_similar(self, card_name: str, limit: int = 5, randomize: bool = True) -> Optional[list[dict]]:
"""
Get cached similar cards for a given card.
Args:
card_name: Name of the card to look up
limit: Maximum number of results to return
randomize: If True, randomly sample from cached results; if False, return top by rank
Returns:
List of similar cards with similarity scores, or None if not in cache
"""
if not self.enabled:
return None
cache_df = self.load_cache()
if len(cache_df) == 0:
return None
# Filter to this card
card_data = cache_df[cache_df["card_name"] == card_name]
if len(card_data) == 0:
return None
# Randomly sample if requested and we have more results than limit
if randomize and len(card_data) > limit:
card_data = card_data.sample(n=limit, random_state=None)
else:
# Sort by rank and take top N
card_data = card_data.sort_values("rank").head(limit)
# Convert to list of dicts
results = []
for _, row in card_data.iterrows():
results.append({
"name": row["similar_name"],
"similarity": row["similarity"],
"edhrecRank": row["edhrecRank"],
})
return results
def set_similar(self, card_name: str, similar_cards: list[dict]) -> bool:
"""
Cache similar cards for a given card.
Args:
card_name: Name of the card
similar_cards: List of similar cards with similarity scores
Returns:
True if successful, False otherwise
"""
if not self.enabled:
return False
cache_df = self.load_cache()
# Remove existing entries for this card
cache_df = cache_df[cache_df["card_name"] != card_name]
# Add new entries
new_rows = []
for rank, card in enumerate(similar_cards):
new_rows.append({
"card_name": card_name,
"similar_name": card["name"],
"similarity": card["similarity"],
"edhrecRank": card.get("edhrecRank", float("inf")),
"rank": rank,
})
if new_rows:
new_df = pd.DataFrame(new_rows)
cache_df = pd.concat([cache_df, new_df], ignore_index=True)
return self.save_cache(cache_df)
def invalidate(self, card_name: Optional[str] = None) -> bool:
"""
Invalidate cache entries.
Args:
card_name: If provided, invalidate only this card. If None, clear entire cache.
Returns:
True if successful, False otherwise
"""
if not self.enabled:
return False
if card_name is None:
# Clear entire cache
logger.info("Clearing entire similarity cache")
self._cache_df = self._empty_cache_df()
self._metadata = self._empty_metadata()
return self.save_cache(self._cache_df, self._metadata)
# Clear specific card
cache_df = self.load_cache()
initial_len = len(cache_df)
cache_df = cache_df[cache_df["card_name"] != card_name]
if len(cache_df) < initial_len:
logger.info(f"Invalidated cache for card: {card_name}")
return self.save_cache(cache_df)
return False
def get_stats(self) -> dict:
"""
Get cache statistics.
Returns:
Dictionary with cache stats (version, total_cards, build_date, file_size, etc.)
"""
if not self.enabled:
return {"enabled": False}
cache_df = self.load_cache()
metadata = self._metadata or self._empty_metadata()
stats = {
"enabled": True,
"version": metadata.get("version", "unknown"),
"total_cards": len(cache_df["card_name"].unique()) if len(cache_df) > 0 else 0,
"total_entries": len(cache_df),
"build_date": metadata.get("build_date"),
"last_updated": metadata.get("last_updated"),
"file_exists": self.cache_path.exists(),
"file_path": str(self.cache_path),
"format": "parquet",
}
if self.cache_path.exists():
stats["file_size_mb"] = round(
self.cache_path.stat().st_size / (1024 * 1024), 2
)
return stats
@staticmethod
def _empty_cache_df() -> pd.DataFrame:
"""
Create empty cache DataFrame.
Returns:
Empty DataFrame with correct schema
"""
return pd.DataFrame(columns=["card_name", "similar_name", "similarity", "edhrecRank", "rank"])
@staticmethod
def _empty_metadata() -> dict:
"""
Create empty metadata structure.
Returns:
Empty metadata dictionary
"""
return {
"version": CACHE_VERSION,
"total_cards": 0,
"total_entries": 0,
"build_date": None,
"last_updated": None,
"threshold": 0.6,
"min_results": 3,
}
@staticmethod
def _validate_cache(cache_df: pd.DataFrame) -> bool:
"""
Validate cache DataFrame structure.
Args:
cache_df: DataFrame to validate
Returns:
True if valid, False otherwise
"""
if not isinstance(cache_df, pd.DataFrame):
return False
# Check required columns
required_cols = {"card_name", "similar_name", "similarity", "edhrecRank", "rank"}
if not required_cols.issubset(cache_df.columns):
logger.warning(f"Cache missing required columns. Expected: {required_cols}, Got: {set(cache_df.columns)}")
return False
return True
# Singleton instance for global access
_cache_instance: Optional[SimilarityCache] = None
def get_cache() -> SimilarityCache:
"""
Get singleton cache instance.
Returns:
Global SimilarityCache instance
"""
global _cache_instance
if _cache_instance is None:
# Check environment variables for custom path
cache_path_str = os.getenv("SIMILARITY_CACHE_PATH")
cache_path = Path(cache_path_str) if cache_path_str else None
_cache_instance = SimilarityCache(cache_path=cache_path)
return _cache_instance

View file

@ -906,6 +906,90 @@ img.lqip.loaded { filter: blur(0); opacity: 1; }
white-space: nowrap;
}
/* Card Details button on tiles */
.card-details-btn {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 0.35rem;
padding: 0.5rem 0.75rem;
background: var(--primary);
color: white;
text-decoration: none;
border-radius: 6px;
font-weight: 500;
font-size: 0.85rem;
transition: all 0.2s;
margin-top: 0.5rem;
border: none;
cursor: pointer;
}
.card-details-btn:hover {
background: var(--primary-hover);
transform: translateY(-1px);
box-shadow: 0 2px 8px rgba(59, 130, 246, 0.4);
}
.card-details-btn svg {
flex-shrink: 0;
}
/* Card Preview Modal */
.preview-modal {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.85);
z-index: 9999;
align-items: center;
justify-content: center;
}
.preview-modal.active {
display: flex;
}
.preview-content {
position: relative;
max-width: 90%;
max-height: 90%;
}
.preview-content img {
max-width: 100%;
max-height: 90vh;
border-radius: 12px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
}
.preview-close {
position: absolute;
top: -40px;
right: 0;
background: rgba(255, 255, 255, 0.9);
color: #000;
border: none;
border-radius: 50%;
width: 36px;
height: 36px;
font-size: 24px;
font-weight: bold;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: all 0.2s;
}
.preview-close:hover {
background: #fff;
transform: scale(1.1);
}
/* Pagination controls */
.card-browser-pagination {
display: flex;
@ -1063,3 +1147,55 @@ img.lqip.loaded { filter: blur(0); opacity: 1; }
.theme-chip button:hover {
color: var(--error-color);
}
/* Card Detail Page Styles */
.card-tags {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-top: 1rem;
margin-bottom: 1rem;
}
.card-tag {
background: var(--ring);
color: white;
padding: 0.35rem 0.75rem;
border-radius: 16px;
font-size: 0.85rem;
font-weight: 500;
}
.back-button {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.75rem 1.5rem;
background: var(--panel);
color: var(--text);
text-decoration: none;
border-radius: 8px;
border: 1px solid var(--border);
font-weight: 500;
transition: all 0.2s;
margin-bottom: 2rem;
}
.back-button:hover {
background: var(--ring);
color: white;
border-color: var(--ring);
}
/* Card Detail Page - Main Card Image */
.card-image-large {
flex: 0 0 auto;
max-width: 360px !important;
width: 100%;
}
.card-image-large img {
width: 100%;
height: auto;
border-radius: 12px;
}

View file

@ -1,6 +1,6 @@
{# Single card tile for grid display #}
<div class="card-browser-tile card-tile" data-card-name="{{ card.name }}" data-tags="{{ card.themeTags_parsed|join(', ') if card.themeTags_parsed else '' }}">
{# Card image #}
{# Card image (uses hover system for preview) #}
<div class="card-browser-tile-image">
<img
loading="lazy"
@ -55,6 +55,16 @@
{% endif %}
</div>
{# Card Details button (only show if feature enabled) #}
{% if enable_card_details %}
<a href="/cards/{{ card.name }}" class="card-details-btn" onclick="event.stopPropagation()">
Card Details
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor">
<path d="M8.707 3.293a1 1 0 010 1.414L5.414 8l3.293 3.293a1 1 0 01-1.414 1.414l-4-4a1 1 0 010-1.414l4-4a1 1 0 011.414 0z" transform="rotate(180 8 8)"/>
</svg>
</a>
{% endif %}
{# Theme tags (show all tags, not truncated) #}
{% if card.themeTags_parsed and card.themeTags_parsed|length > 0 %}
<div class="card-browser-tile-tags">

View file

@ -0,0 +1,250 @@
<style>
.similar-cards-header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 1.5rem;
}
.similar-cards-title {
font-size: 1.5rem;
font-weight: bold;
color: var(--text);
}
.similar-cards-grid {
display: grid;
grid-template-columns: repeat(auto-fill, 280px);
gap: 1.25rem;
margin-bottom: 2rem;
justify-content: start;
}
.similar-card-tile {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 12px;
padding: 0.85rem;
transition: all 0.2s;
display: flex;
flex-direction: column;
gap: 0.6rem;
width: 280px;
}
.similar-card-tile:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
border-color: var(--ring);
}
.similar-card-image {
width: 100%;
cursor: pointer;
border-radius: 8px;
transition: transform 0.2s;
}
.similar-card-image:hover {
transform: scale(1.02);
}
.similar-card-image img {
width: 100%;
height: auto;
border-radius: 8px;
}
.similar-card-info {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.similar-card-name {
font-size: 1rem;
font-weight: 600;
color: var(--text);
}
.similarity-score {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.25rem 0.75rem;
background: var(--ring);
color: white;
border-radius: 16px;
font-size: 0.85rem;
font-weight: 600;
width: fit-content;
}
.similarity-score-high {
background: #28a745;
}
.similarity-score-medium {
background: #ffc107;
color: #000;
}
.similarity-score-low {
background: #6c757d;
}
.similar-card-details-btn {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 0.5rem;
padding: 0.5rem 1rem;
background: var(--ring);
color: white;
text-decoration: none;
border-radius: 6px;
font-weight: 500;
transition: all 0.2s;
margin-top: auto;
}
.similar-card-details-btn:hover {
opacity: 0.9;
transform: translateY(-1px);
}
.no-similar-cards {
text-align: center;
padding: 3rem 1rem;
color: var(--muted);
background: var(--panel);
border: 1px solid var(--border);
border-radius: 12px;
}
.no-similar-cards-icon {
font-size: 3rem;
margin-bottom: 1rem;
opacity: 0.5;
}
.no-similar-cards-text {
font-size: 1.1rem;
font-weight: 500;
}
.similar-card-tags {
display: flex;
flex-wrap: wrap;
gap: 0.35rem;
margin-top: 0.25rem;
}
.similar-tag {
font-size: 0.75rem;
padding: 0.2rem 0.5rem;
background: rgba(148, 163, 184, 0.15);
color: var(--muted);
border-radius: 4px;
white-space: nowrap;
transition: all 0.2s;
}
.similar-tag-overlap {
background: var(--accent, #38bdf8);
color: white;
font-weight: 600;
border: 1px solid rgba(56, 189, 248, 0.3);
box-shadow: 0 0 0 1px rgba(56, 189, 248, 0.2);
}
@media (max-width: 768px) {
.similar-cards-grid {
grid-template-columns: 1fr;
}
}
</style>
<div class="similar-cards-section">
<div class="similar-cards-header">
<h2 class="similar-cards-title">Similar Cards</h2>
</div>
{% if similar_cards and similar_cards|length > 0 %}
<div class="similar-cards-grid">
{% for card in similar_cards %}
<div class="similar-card-tile card-tile" data-card-name="{{ card.name }}">
<!-- Card Image (uses hover system for preview) -->
<div class="similar-card-image">
<img src="https://api.scryfall.com/cards/named?fuzzy={{ card.name|urlencode }}&format=image&version=normal"
alt="{{ card.name }}"
loading="lazy"
data-card-name="{{ card.name }}"
onerror="this.style.display='none'; this.nextElementSibling.style.display='flex';">
{# Fallback for missing images #}
<div style="display:none; width:100%; aspect-ratio:488/680; align-items:center; justify-content:center; background:#1a1d24; color:#9ca3af; font-size:14px; padding:1rem; text-align:center; border-radius:8px;">
{{ card.name }}
</div>
</div>
<!-- Card Info -->
<div class="similar-card-info">
<div class="similar-card-name">{{ card.name }}</div>
<!-- Matching Themes Summary -->
{% if card.themeTags and card.themeTags|length > 0 %}
{% set main_card_tags = main_card_tags|default([]) %}
{% set matching_tags = [] %}
{% for tag in card.themeTags %}
{% if tag in main_card_tags %}
{% set _ = matching_tags.append(tag) %}
{% endif %}
{% endfor %}
{% if matching_tags|length > 0 %}
<div style="font-size: 0.8rem; color: var(--accent, #38bdf8); font-weight: 600; margin-top: 0.25rem;">
✓ {{ matching_tags|length }} matching theme{{ 's' if matching_tags|length > 1 else '' }}
</div>
{% endif %}
{% endif %}
<!-- EDHREC Rank -->
{% if card.edhrecRank %}
<div class="card-stat" style="font-size: 0.85rem; color: var(--muted);">
EDHREC Rank: #{{ card.edhrecRank }}
</div>
{% endif %}
<!-- Theme Tags with Overlap Highlighting -->
{% if card.themeTags and card.themeTags|length > 0 %}
<div class="similar-card-tags">
{% set main_card_tags = main_card_tags|default([]) %}
{% for tag in card.themeTags %}
{% set is_overlap = tag in main_card_tags %}
<span class="similar-tag {% if is_overlap %}similar-tag-overlap{% endif %}" title="{% if is_overlap %}Matches main card{% endif %}">
{{ tag }}
</span>
{% endfor %}
</div>
{% endif %}
</div>
<!-- Card Details Button -->
<a href="/cards/{{ card.name }}" class="similar-card-details-btn" onclick="event.stopPropagation()">
Card Details
<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
<path d="M8.707 3.293a1 1 0 010 1.414L5.414 8l3.293 3.293a1 1 0 01-1.414 1.414l-4-4a1 1 0 010-1.414l4-4a1 1 0 011.414 0z" transform="rotate(180 8 8)"/>
</svg>
</a>
</div>
{% endfor %}
</div>
{% else %}
<div class="no-similar-cards">
<div class="no-similar-cards-icon">🔍</div>
<div class="no-similar-cards-text">No similar cards found</div>
<p style="margin-top: 0.5rem; font-size: 0.9rem;">
This card has unique theme tags or no cards share similar characteristics.
</p>
</div>
{% endif %}
</div>

View file

@ -0,0 +1,273 @@
{% extends "base.html" %}
{% block title %}{{ card.name }} - Card Details{% endblock %}
{% block head %}
<style>
.card-detail-container {
max-width: 1400px;
margin: 0 auto;
padding: 2rem 1rem;
}
.card-detail-header {
display: flex;
gap: 2rem;
margin-bottom: 3rem;
flex-wrap: wrap;
}
.card-image-large {
flex: 0 0 auto;
max-width: 360px;
cursor: pointer;
transition: transform 0.2s;
}
.card-image-large:hover {
transform: scale(1.02);
}
.card-image-large img {
width: 100%;
height: auto;
border-radius: 12px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.card-info {
flex: 1;
min-width: 300px;
}
.card-title {
font-size: 2rem;
font-weight: bold;
margin-bottom: 0.5rem;
color: var(--text);
}
.card-type {
font-size: 1.1rem;
color: var(--muted);
margin-bottom: 1rem;
}
.card-stats {
display: flex;
gap: 2rem;
margin-bottom: 1.5rem;
flex-wrap: wrap;
}
.card-stat {
display: flex;
flex-direction: column;
}
.card-stat-label {
font-size: 0.85rem;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 0.25rem;
}
.card-stat-value {
font-size: 1.25rem;
font-weight: 600;
color: var(--text);
}
.card-text {
background: var(--panel);
padding: 1.5rem;
border-radius: 8px;
margin-bottom: 1.5rem;
line-height: 1.6;
white-space: pre-wrap;
border: 1px solid var(--border);
}
.card-colors {
display: flex;
gap: 0.5rem;
margin-bottom: 1rem;
}
.color-symbol {
width: 24px;
height: 24px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: bold;
font-size: 14px;
border: 2px solid currentColor;
}
.color-W { background: #F0E68C; color: #000; }
.color-U { background: #0E68AB; color: #fff; }
.color-B { background: #150B00; color: #fff; }
.color-R { background: #D32029; color: #fff; }
.color-G { background: #00733E; color: #fff; }
.color-C { background: #ccc; color: #000; }
.card-tags {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-bottom: 1rem;
}
.card-tag {
background: var(--ring);
color: white;
padding: 0.35rem 0.75rem;
border-radius: 16px;
font-size: 0.85rem;
font-weight: 500;
}
.back-button {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.75rem 1.5rem;
background: var(--panel);
color: var(--text);
text-decoration: none;
border-radius: 8px;
border: 1px solid var(--border);
font-weight: 500;
transition: all 0.2s;
margin-bottom: 2rem;
}
.back-button:hover {
background: var(--ring);
color: white;
border-color: var(--ring);
}
.similar-section {
margin-top: 3rem;
padding-top: 2rem;
border-top: 2px solid var(--border);
}
/* Responsive adjustments */
@media (max-width: 768px) {
.card-detail-header {
flex-direction: column;
align-items: center;
}
.card-image-large {
max-width: 100%;
}
.card-stats {
gap: 1rem;
}
.card-title {
font-size: 1.5rem;
}
}
</style>
{% endblock %}
{% block content %}
<div class="card-detail-container">
<!-- Back Button -->
<a href="/cards" class="back-button">
<svg width="20" height="20" viewBox="0 0 20 20" fill="currentColor">
<path d="M12.707 5.293a1 1 0 010 1.414L9.414 10l3.293 3.293a1 1 0 01-1.414 1.414l-4-4a1 1 0 010-1.414l4-4a1 1 0 011.414 0z"/>
</svg>
Back to Card Browser
</a>
<!-- Card Header -->
<div class="card-detail-header">
<!-- Card Image (no hover on detail page) -->
<div class="card-image-large">
<img src="https://api.scryfall.com/cards/named?fuzzy={{ card.name|urlencode }}&format=image&version=normal"
alt="{{ card.name }}"
loading="lazy"
onerror="this.style.display='none'; this.nextElementSibling.style.display='flex';">
{# Fallback for missing images #}
<div style="display:none; width:100%; height:680px; align-items:center; justify-content:center; background:#1a1d24; color:#9ca3af; font-size:18px; padding:2rem; text-align:center; border-radius:12px;">
{{ card.name }}
</div>
</div>
<!-- Card Info -->
<div class="card-info">
<h1 class="card-title">{{ card.name }}</h1>
<div class="card-type">{{ card.type }}</div>
<!-- Color Identity -->
{% if card.colors %}
<div class="card-colors">
{% for color in card.colors %}
<span class="color-symbol color-{{ color }}">{{ color }}</span>
{% endfor %}
</div>
{% endif %}
<!-- Stats -->
<div class="card-stats">
{% if card.manaValue is not none %}
<div class="card-stat">
<span class="card-stat-label">Mana Value</span>
<span class="card-stat-value">{{ card.manaValue }}</span>
</div>
{% endif %}
{% if card.power is not none and card.power != 'NaN' and card.power|string != 'nan' %}
<div class="card-stat">
<span class="card-stat-label">Power / Toughness</span>
<span class="card-stat-value">{{ card.power }} / {{ card.toughness }}</span>
</div>
{% endif %}
{% if card.edhrecRank %}
<div class="card-stat">
<span class="card-stat-label">EDHREC Rank</span>
<span class="card-stat-value">#{{ card.edhrecRank }}</span>
</div>
{% endif %}
{% if card.rarity %}
<div class="card-stat">
<span class="card-stat-label">Rarity</span>
<span class="card-stat-value">{{ card.rarity | capitalize }}</span>
</div>
{% endif %}
</div>
<!-- Oracle Text -->
{% if card.text %}
<div class="card-text" style="white-space: pre-line;">{{ card.text | replace('\\n', '\n') }}</div>
{% endif %}
<!-- Theme Tags -->
{% if card.themeTags_parsed and card.themeTags_parsed|length > 0 %}
<div class="card-tags">
{% for tag in card.themeTags_parsed %}
<span class="card-tag">{{ tag }}</span>
{% endfor %}
</div>
{% endif %}
</div>
</div>
<!-- Similar Cards Section -->
<div class="similar-section">
{% include "browse/cards/_similar_cards.html" %}
</div>
</div>
{% endblock %}

View file

@ -345,7 +345,7 @@
<button
type="button"
class="btn"
hx-get="/cards/grid?cursor={{ last_card|urlencode }}{% if search %}&search={{ search|urlencode }}{% endif %}{% if theme %}&theme={{ theme|urlencode }}{% endif %}{% if color %}&color={{ color|urlencode }}{% endif %}{% if card_type %}&card_type={{ card_type|urlencode }}{% endif %}{% if rarity %}&rarity={{ rarity|urlencode }}{% endif %}{% if cmc_min %}&cmc_min={{ cmc_min }}{% endif %}{% if cmc_max %}&cmc_max={{ cmc_max }}{% endif %}"
hx-get="/cards/grid?cursor={{ last_card|urlencode }}{% if search %}&search={{ search|urlencode }}{% endif %}{% for theme in themes %}&themes={{ theme|urlencode }}{% endfor %}{% if color %}&color={{ color|urlencode }}{% endif %}{% if card_type %}&card_type={{ card_type|urlencode }}{% endif %}{% if rarity %}&rarity={{ rarity|urlencode }}{% endif %}{% if sort and sort != 'name_asc' %}&sort={{ sort|urlencode }}{% endif %}{% if cmc_min %}&cmc_min={{ cmc_min }}{% endif %}{% if cmc_max %}&cmc_max={{ cmc_max }}{% endif %}{% if power_min %}&power_min={{ power_min }}{% endif %}{% if power_max %}&power_max={{ power_max }}{% endif %}{% if tough_min %}&tough_min={{ tough_min }}{% endif %}{% if tough_max %}&tough_max={{ tough_max }}{% endif %}"
hx-target="#card-grid"
hx-swap="beforeend"
hx-indicator="#load-indicator">

View file

@ -0,0 +1,88 @@
{% extends "base.html" %}
{% block title %}{{ error_code }} Error{% endblock %}
{% block content %}
<style>
.error-container {
max-width: 600px;
margin: 4rem auto;
padding: 2rem;
text-align: center;
}
.error-code {
font-size: 6rem;
font-weight: bold;
color: var(--ring);
margin-bottom: 1rem;
line-height: 1;
}
.error-message {
font-size: 1.5rem;
font-weight: 500;
color: var(--text);
margin-bottom: 2rem;
}
.error-actions {
display: flex;
gap: 1rem;
justify-content: center;
flex-wrap: wrap;
}
.error-btn {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.75rem 1.5rem;
background: var(--ring);
color: white;
text-decoration: none;
border-radius: 8px;
font-weight: 500;
transition: all 0.2s;
}
.error-btn:hover {
opacity: 0.9;
transform: translateY(-1px);
}
.error-btn-secondary {
background: var(--panel);
color: var(--text);
border: 1px solid var(--border);
}
.error-btn-secondary:hover {
background: var(--border);
}
</style>
<div class="error-container">
<div class="error-code">{{ error_code }}</div>
<div class="error-message">{{ error_message }}</div>
<div class="error-actions">
{% if back_link %}
<a href="{{ back_link }}" class="error-btn">
<svg width="20" height="20" viewBox="0 0 20 20" fill="currentColor">
<path d="M12.707 5.293a1 1 0 010 1.414L9.414 10l3.293 3.293a1 1 0 01-1.414 1.414l-4-4a1 1 0 010-1.414l4-4a1 1 0 011.414 0z"/>
</svg>
{{ back_text if back_text else "Go Back" }}
</a>
{% endif %}
<a href="/" class="error-btn error-btn-secondary">
<svg width="20" height="20" viewBox="0 0 20 20" fill="currentColor">
<path d="M10.707 2.293a1 1 0 00-1.414 0l-7 7a1 1 0 001.414 1.414L4 10.414V17a1 1 0 001 1h2a1 1 0 001-1v-2a1 1 0 011-1h2a1 1 0 011 1v2a1 1 0 001 1h2a1 1 0 001-1v-6.586l.293.293a1 1 0 001.414-1.414l-7-7z"/>
</svg>
Go Home
</a>
</div>
</div>
{% endblock %}

View file

@ -47,6 +47,25 @@
<button type="button" id="btn-refresh-themes" class="action-btn" onclick="refreshThemes()">Refresh Themes Only</button>
<button type="button" id="btn-rebuild-cards" class="action-btn" onclick="rebuildCards()">Rebuild Card Files</button>
</div>
{% if similarity_enabled %}
<details style="margin-top:1.25rem;" open>
<summary>Similarity Cache Status</summary>
<div id="similarity-status" style="margin-top:.5rem; padding:1rem; border:1px solid var(--border); background:#0f1115; border-radius:8px;">
<div class="muted">Status:</div>
<div id="similarity-status-line" style="margin-top:.25rem;">Checking…</div>
<div class="muted" id="similarity-meta-line" style="margin-top:.25rem; display:none;"></div>
<div class="muted" id="similarity-warning-line" style="margin-top:.25rem; display:none; color:#f59e0b;"></div>
</div>
</details>
<div style="margin-top:.75rem; display:flex; gap:.5rem; flex-wrap:wrap;">
<button type="button" id="btn-build-similarity" class="action-btn" onclick="buildSimilarityCache()">Build Similarity Cache</button>
<label class="muted" style="align-self:center; font-size:.85rem;">
<input type="checkbox" id="chk-skip-download" /> Skip GitHub download (build locally)
</label>
<span class="muted" style="align-self:center; font-size:.85rem;">(~15-20 min local, instant if cached on GitHub)</span>
</div>
{% endif %}
</section>
<script>
(function(){
@ -239,6 +258,123 @@
}, 2000);
});
};
// Similarity cache status polling
{% if similarity_enabled %}
function pollSimilarityStatus(){
fetch('/status/similarity', { cache: 'no-store' })
.then(function(r){ return r.json(); })
.then(function(data){
var line = document.getElementById('similarity-status-line');
var metaLine = document.getElementById('similarity-meta-line');
var warnLine = document.getElementById('similarity-warning-line');
if (!line) return;
if (data.exists && data.valid) {
var cardCount = data.card_count ? data.card_count.toLocaleString() : '?';
var sizeMB = data.size_mb ? data.size_mb.toFixed(1) : '?';
var ageDays = data.age_days !== null ? data.age_days.toFixed(1) : '?';
line.textContent = 'Cache exists and is valid';
line.style.color = '#34d399';
if (metaLine) {
metaLine.style.display = '';
metaLine.textContent = cardCount + ' cards cached • ' + sizeMB + ' MB • ' + ageDays + ' days old';
}
if (warnLine && data.needs_refresh) {
warnLine.style.display = '';
warnLine.textContent = '⚠ Cache is ' + ageDays + ' days old. Consider rebuilding for fresher data.';
} else if (warnLine) {
warnLine.style.display = 'none';
}
} else if (data.exists && !data.valid) {
line.textContent = 'Cache file is invalid or corrupted';
line.style.color = '#f87171';
if (metaLine) metaLine.style.display = 'none';
if (warnLine) {
warnLine.style.display = '';
warnLine.textContent = '⚠ Rebuild cache to fix.';
}
} else {
line.textContent = 'No cache found';
line.style.color = '#94a3b8';
if (metaLine) metaLine.style.display = 'none';
if (warnLine) {
warnLine.style.display = '';
warnLine.textContent = ' Build cache to enable similar card features.';
}
}
})
.catch(function(){});
}
window.buildSimilarityCache = function(){
var btn = document.getElementById('btn-build-similarity');
var skipDownloadCheckbox = document.getElementById('chk-skip-download');
if (!btn) return;
var skipDownload = skipDownloadCheckbox && skipDownloadCheckbox.checked;
var confirmMsg = skipDownload
? 'Build similarity cache locally for ~30k cards? This will take approximately 15-20 minutes and uses parallel processing.'
: 'Build similarity cache? This will first try to download a pre-built cache from GitHub (instant), or build locally if unavailable (~15-20 minutes).';
if (!confirm(confirmMsg)) {
return;
}
btn.disabled = true;
btn.textContent = 'Building... (check terminal for progress)';
var body = skipDownload ? JSON.stringify({ skip_download: true }) : '{}';
fetch('/similarity/build', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: body
})
.then(function(r){
if (!r.ok) throw new Error('Build failed');
return r.json();
})
.then(function(data){
if (data.success) {
btn.textContent = 'Build Started! Check terminal for progress...';
// Poll status more frequently while building
var pollCount = 0;
var buildPoll = setInterval(function(){
pollSimilarityStatus();
pollCount++;
// Stop intensive polling after 2 minutes, rely on normal polling
if (pollCount > 40) clearInterval(buildPoll);
}, 3000);
setTimeout(function(){
btn.textContent = 'Build Similarity Cache';
btn.disabled = false;
}, 8000);
} else {
btn.textContent = 'Build Failed: ' + (data.error || 'Unknown error');
setTimeout(function(){
btn.textContent = 'Build Similarity Cache';
btn.disabled = false;
}, 3000);
}
})
.catch(function(err){
btn.textContent = 'Build Failed';
setTimeout(function(){
btn.textContent = 'Build Similarity Cache';
btn.disabled = false;
}, 3000);
});
};
pollSimilarityStatus();
setInterval(pollSimilarityStatus, 10000); // Poll every 10s
{% endif %}
setInterval(poll, 3000);
poll();
pollThemes();

File diff suppressed because it is too large Load diff

View file

@ -29,6 +29,12 @@ services:
SHOW_MUST_HAVE_BUTTONS: "0" # 1=show must include/exclude controls in the UI (default hidden)
SHOW_MISC_POOL: "0"
WEB_THEME_PICKER_DIAGNOSTICS: "1" # 1=enable extra theme catalog diagnostics fields, uncapped view & /themes/metrics
ENABLE_CARD_DETAILS: "1" # 1=show Card Details button in card browser (with similarity cache)
# Similarity Cache (Phase 2 - M5 Performance Optimization)
SIMILARITY_CACHE_ENABLED: "1" # 1=use pre-computed similarity cache; 0=real-time calculation
SIMILARITY_CACHE_PATH: "card_files/similarity_cache.parquet" # Path to Parquet cache file
# Partner / Background mechanics (feature flag)
ENABLE_PARTNER_MECHANICS: "1" # 1=unlock partner/background commander inputs
ENABLE_PARTNER_SUGGESTIONS: "1" # 1=enable partner suggestion API/UI (requires dataset)

View file

@ -31,6 +31,12 @@ services:
SHOW_MUST_HAVE_BUTTONS: "0" # 1=show must include/exclude controls in the UI (default hidden)
SHOW_MISC_POOL: "0"
WEB_THEME_PICKER_DIAGNOSTICS: "1" # 1=enable extra theme catalog diagnostics fields, uncapped view & /themes/metrics
ENABLE_CARD_DETAILS: "1" # 1=show Card Details button in card browser (with similarity cache)
# Similarity Cache (Phase 2 - M5 Performance Optimization)
SIMILARITY_CACHE_ENABLED: "1" # 1=use pre-computed similarity cache; 0=real-time calculation
SIMILARITY_CACHE_PATH: "card_files/similarity_cache.parquet" # Path to Parquet cache file
# HEADLESS_EXPORT_JSON: "1" # 1=export resolved run config JSON
ENABLE_PARTNER_MECHANICS: "1" # 1=unlock partner/background commander inputs
ENABLE_PARTNER_SUGGESTIONS: "1" # 1=enable partner suggestion API/UI (requires dataset)

View file

@ -6,6 +6,13 @@ seed_defaults() {
# Ensure base config and data directories exist
mkdir -p /app/config /app/config/card_lists /app/config/themes /app/card_files
# Download pre-built similarity cache from GitHub if not present
if [ ! -f /app/card_files/similarity_cache.parquet ]; then
echo "Downloading similarity cache from GitHub..."
wget -q https://raw.githubusercontent.com/mwisnowski/mtg_python_deckbuilder/similarity-cache-data/card_files/similarity_cache.parquet -O /app/card_files/similarity_cache.parquet 2>/dev/null || echo "Warning: Could not download similarity cache"
wget -q https://raw.githubusercontent.com/mwisnowski/mtg_python_deckbuilder/similarity-cache-data/card_files/similarity_cache_metadata.json -O /app/card_files/similarity_cache_metadata.json 2>/dev/null || true
fi
# Copy from baked-in defaults if targets are missing
if [ -d "/.defaults/config" ]; then
# deck.json