mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 07:30:13 +01:00
perf: improve commander selection speed and fix color identity display
This commit is contained in:
parent
454269daab
commit
345dfb3e01
12 changed files with 321 additions and 106 deletions
26
.github/workflows/build-similarity-cache.yml
vendored
26
.github/workflows/build-similarity-cache.yml
vendored
|
|
@ -198,29 +198,19 @@ jobs:
|
||||||
if: steps.check_cache.outputs.needs_build == 'true'
|
if: steps.check_cache.outputs.needs_build == 'true'
|
||||||
run: |
|
run: |
|
||||||
if [ ! -f "card_files/similarity_cache.parquet" ]; then
|
if [ ! -f "card_files/similarity_cache.parquet" ]; then
|
||||||
echo "ERROR: Cache Parquet file was not created"
|
echo "ERROR: Similarity cache not created"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -f "card_files/similarity_cache_metadata.json" ]; then
|
if [ ! -f "card_files/similarity_cache_metadata.json" ]; then
|
||||||
echo "ERROR: Cache metadata file was not created"
|
echo "ERROR: Similarity cache metadata not created"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ ! -f "card_files/processed/commander_cards.parquet" ]; then
|
||||||
|
echo "ERROR: Commander cache not created"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check cache validity
|
echo "✓ All cache files created successfully"
|
||||||
python -c "
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
from code.web.services.similarity_cache import get_cache
|
|
||||||
|
|
||||||
cache = get_cache()
|
|
||||||
stats = cache.get_stats()
|
|
||||||
|
|
||||||
if stats['total_cards'] < 20000:
|
|
||||||
raise ValueError(f\"Cache only has {stats['total_cards']} cards, expected ~30k\")
|
|
||||||
|
|
||||||
print(f\"✓ Cache is valid with {stats['total_cards']:,} cards, {stats['total_entries']:,} entries\")
|
|
||||||
print(f\" File size: {stats['file_size_mb']:.2f} MB\")
|
|
||||||
"
|
|
||||||
|
|
||||||
- name: Get cache metadata for commit message
|
- name: Get cache metadata for commit message
|
||||||
if: steps.check_cache.outputs.needs_build == 'true'
|
if: steps.check_cache.outputs.needs_build == 'true'
|
||||||
|
|
@ -266,6 +256,7 @@ jobs:
|
||||||
echo "- \`card_files/similarity_cache.parquet\` - Pre-computed card similarity cache" >> README.md
|
echo "- \`card_files/similarity_cache.parquet\` - Pre-computed card similarity cache" >> README.md
|
||||||
echo "- \`card_files/similarity_cache_metadata.json\` - Cache metadata" >> README.md
|
echo "- \`card_files/similarity_cache_metadata.json\` - Cache metadata" >> README.md
|
||||||
echo "- \`card_files/processed/all_cards.parquet\` - Tagged card database" >> README.md
|
echo "- \`card_files/processed/all_cards.parquet\` - Tagged card database" >> README.md
|
||||||
|
echo "- \`card_files/processed/commander_cards.parquet\` - Commander-only cache (fast lookups)" >> README.md
|
||||||
echo "- \`card_files/processed/.tagging_complete.json\` - Tagging status" >> README.md
|
echo "- \`card_files/processed/.tagging_complete.json\` - Tagging status" >> README.md
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -278,6 +269,7 @@ jobs:
|
||||||
|
|
||||||
# Add processed Parquet and status file
|
# Add processed Parquet and status file
|
||||||
git add -f card_files/processed/all_cards.parquet
|
git add -f card_files/processed/all_cards.parquet
|
||||||
|
git add -f card_files/processed/commander_cards.parquet
|
||||||
git add -f card_files/processed/.tagging_complete.json
|
git add -f card_files/processed/.tagging_complete.json
|
||||||
|
|
||||||
git add README.md 2>/dev/null || true
|
git add README.md 2>/dev/null || true
|
||||||
|
|
|
||||||
23
CHANGELOG.md
23
CHANGELOG.md
|
|
@ -8,7 +8,28 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning
|
||||||
- Link PRs/issues inline when helpful, e.g., (#123) or [#123]. Reference-style links at the bottom are encouraged for readability.
|
- Link PRs/issues inline when helpful, e.g., (#123) or [#123]. Reference-style links at the bottom are encouraged for readability.
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
_No unreleased changes yet_
|
### Added
|
||||||
|
_None_
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
_None_
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
_None_
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Color Identity Display**: Fixed commander color identity showing incorrectly as "Colorless (C)" for non-partner commanders in the summary panel
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
- **Commander Selection Speed**: Dramatically improved response time from 4+ seconds to under 1 second
|
||||||
|
- Implemented intelligent caching for card data to eliminate redundant file loading
|
||||||
|
- Both commander data and full card database now cached with automatic refresh when data updates
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
_None_
|
||||||
|
|
||||||
|
### Security
|
||||||
|
_None_
|
||||||
|
|
||||||
## [3.0.0] - 2025-10-19
|
## [3.0.0] - 2025-10-19
|
||||||
### Summary
|
### Summary
|
||||||
|
|
|
||||||
|
|
@ -1,36 +1,33 @@
|
||||||
# MTG Python Deckbuilder ${VERSION}
|
# MTG Python Deckbuilder ${VERSION}
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
### Summary
|
### Summary
|
||||||
Major infrastructure upgrade: migrated to Parquet data format with comprehensive performance improvements, combo tag support, simplified data management, and instant setup via GitHub downloads.
|
Performance improvements and bug fixes for commander selection and display.
|
||||||
|
|
||||||
### What's New
|
### Added
|
||||||
- **Instant Setup** - Download pre-tagged card database from GitHub instead of 15-20 minute initial build
|
_None_
|
||||||
- **Parquet Migration** - Unified `all_cards.parquet` replaces multiple CSV files for faster, more efficient card storage
|
|
||||||
- **Combo Tags** - 226 cards now tagged with combo-enabling abilities for better synergy detection
|
|
||||||
- **Parallel Tagging** - Optional 4.2x speedup for card tagging (22s → 5.2s)
|
|
||||||
- **Automatic Deduplication** - No more duplicate card printings cluttering your deck options
|
|
||||||
- **Built-in Commander Filtering** - Instant identification of 2,751 commanders and 31 backgrounds
|
|
||||||
|
|
||||||
### Improvements
|
### Changed
|
||||||
- **First-Run Experience** - Auto-downloads pre-tagged data on first run (seconds vs. 15-20 minutes)
|
_None_
|
||||||
- **Faster Startup** - Binary columnar format loads significantly faster than text parsing
|
|
||||||
- **Smaller File Sizes** - Single Parquet file is more compact than multiple CSVs
|
### Removed
|
||||||
- **Better Data Quality** - Automatic validation, deduplication, and type checking
|
_None_
|
||||||
- **Cleaner Organization** - Single source of truth for all 29,857 cards
|
|
||||||
- **Web Performance** - Card browser, commander catalog, and owned cards all benefit from faster data access
|
### Fixed
|
||||||
- **Weekly Updates** - Pre-tagged data refreshed weekly via GitHub Actions
|
- **Color Identity Display**: Fixed commander color identity showing incorrectly as "Colorless (C)" for non-partner commanders in the summary panel
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
- **Commander Selection Speed**: Dramatically improved response time from 4+ seconds to under 1 second
|
||||||
|
- Implemented intelligent caching for card data to eliminate redundant file loading
|
||||||
|
- Both commander data and full card database now cached with automatic refresh when data updates
|
||||||
|
|
||||||
### For Users
|
### For Users
|
||||||
Everything works the same or better! Main visible differences:
|
- Commander selection is now **much faster** - expect sub-second response times
|
||||||
- **First-time users**: Setup completes in seconds (auto-downloads pre-tagged data)
|
- Color identity labels in deck summaries now display correctly for all commanders
|
||||||
- Faster load times and data operations
|
|
||||||
- Better card recommendations with combo tag support
|
|
||||||
- More reliable data handling
|
|
||||||
- Web UI includes manual "Download from GitHub" button for instant refresh
|
|
||||||
|
|
||||||
### Technical Details
|
### Deprecated
|
||||||
- Data stored in `card_files/processed/all_cards.parquet`
|
_None_
|
||||||
- Boolean flags (`isCommander`, `isBackground`) replace separate CSV files
|
|
||||||
- CLI execution: `python -m code.main`
|
### Security
|
||||||
- Headless execution: `python -m code.headless_runner --config <path>`
|
_None_
|
||||||
- GitHub Actions and Docker builds updated for Parquet workflow
|
|
||||||
|
|
@ -838,7 +838,29 @@ class DeckBuilder(
|
||||||
if self._commander_df is not None:
|
if self._commander_df is not None:
|
||||||
return self._commander_df
|
return self._commander_df
|
||||||
|
|
||||||
# M4: Load commanders from Parquet instead of CSV
|
# M7: Try loading from dedicated commander cache first (fast path)
|
||||||
|
from path_util import get_commander_cards_path
|
||||||
|
from file_setup.data_loader import DataLoader
|
||||||
|
|
||||||
|
commander_path = get_commander_cards_path()
|
||||||
|
if os.path.exists(commander_path):
|
||||||
|
try:
|
||||||
|
loader = DataLoader()
|
||||||
|
df = loader.read_cards(commander_path, format="parquet")
|
||||||
|
|
||||||
|
# Ensure required columns exist with proper defaults
|
||||||
|
if "themeTags" not in df.columns:
|
||||||
|
df["themeTags"] = [[] for _ in range(len(df))]
|
||||||
|
if "creatureTypes" not in df.columns:
|
||||||
|
df["creatureTypes"] = [[] for _ in range(len(df))]
|
||||||
|
|
||||||
|
self._commander_df = df
|
||||||
|
return df
|
||||||
|
except Exception:
|
||||||
|
# Fall through to legacy path if cache read fails
|
||||||
|
pass
|
||||||
|
|
||||||
|
# M4: Fallback - Load commanders from full Parquet file (slower)
|
||||||
from deck_builder import builder_utils as bu
|
from deck_builder import builder_utils as bu
|
||||||
from deck_builder import builder_constants as bc
|
from deck_builder import builder_constants as bc
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,22 +71,43 @@ def _resolved_csv_dir(base_dir: str | None = None) -> str:
|
||||||
return base_dir or csv_dir()
|
return base_dir or csv_dir()
|
||||||
|
|
||||||
|
|
||||||
|
# M7: Cache for all cards Parquet DataFrame to avoid repeated loads
|
||||||
|
_ALL_CARDS_CACHE: Dict[str, Any] = {"df": None, "mtime": None}
|
||||||
|
|
||||||
|
|
||||||
def _load_all_cards_parquet() -> pd.DataFrame:
|
def _load_all_cards_parquet() -> pd.DataFrame:
|
||||||
"""Load all cards from the unified Parquet file.
|
"""Load all cards from the unified Parquet file with caching.
|
||||||
|
|
||||||
M4: Centralized Parquet loading for deck builder.
|
M4: Centralized Parquet loading for deck builder.
|
||||||
|
M7: Added module-level caching to avoid repeated file loads.
|
||||||
Returns empty DataFrame on error (defensive).
|
Returns empty DataFrame on error (defensive).
|
||||||
Converts numpy arrays to Python lists for compatibility with existing code.
|
Converts numpy arrays to Python lists for compatibility with existing code.
|
||||||
"""
|
"""
|
||||||
|
global _ALL_CARDS_CACHE
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from code.path_util import get_processed_cards_path
|
from code.path_util import get_processed_cards_path
|
||||||
from code.file_setup.data_loader import DataLoader
|
from code.file_setup.data_loader import DataLoader
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
parquet_path = get_processed_cards_path()
|
parquet_path = get_processed_cards_path()
|
||||||
if not Path(parquet_path).exists():
|
if not Path(parquet_path).exists():
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
# M7: Check cache and mtime
|
||||||
|
need_reload = _ALL_CARDS_CACHE["df"] is None
|
||||||
|
if not need_reload:
|
||||||
|
try:
|
||||||
|
current_mtime = os.path.getmtime(parquet_path)
|
||||||
|
cached_mtime = _ALL_CARDS_CACHE.get("mtime")
|
||||||
|
if cached_mtime is None or current_mtime > cached_mtime:
|
||||||
|
need_reload = True
|
||||||
|
except Exception:
|
||||||
|
# If mtime check fails, use cached version if available
|
||||||
|
pass
|
||||||
|
|
||||||
|
if need_reload:
|
||||||
data_loader = DataLoader()
|
data_loader = DataLoader()
|
||||||
df = data_loader.read_cards(parquet_path, format="parquet")
|
df = data_loader.read_cards(parquet_path, format="parquet")
|
||||||
|
|
||||||
|
|
@ -97,7 +118,14 @@ def _load_all_cards_parquet() -> pd.DataFrame:
|
||||||
if col in df.columns:
|
if col in df.columns:
|
||||||
df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)
|
df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)
|
||||||
|
|
||||||
return df
|
# M7: Cache the result
|
||||||
|
_ALL_CARDS_CACHE["df"] = df
|
||||||
|
try:
|
||||||
|
_ALL_CARDS_CACHE["mtime"] = os.path.getmtime(parquet_path)
|
||||||
|
except Exception:
|
||||||
|
_ALL_CARDS_CACHE["mtime"] = None
|
||||||
|
|
||||||
|
return _ALL_CARDS_CACHE["df"]
|
||||||
except Exception:
|
except Exception:
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -129,7 +129,8 @@ class CommanderSelectionMixin:
|
||||||
def _apply_commander_selection(self, row: pd.Series): # type: ignore[override]
|
def _apply_commander_selection(self, row: pd.Series): # type: ignore[override]
|
||||||
self.commander_name = row["name"]
|
self.commander_name = row["name"]
|
||||||
self.commander_row = row
|
self.commander_row = row
|
||||||
self.commander_tags = list(row.get("themeTags", []) or [])
|
tags_value = row.get("themeTags", [])
|
||||||
|
self.commander_tags = list(tags_value) if tags_value is not None else []
|
||||||
self._initialize_commander_dict(row)
|
self._initialize_commander_dict(row)
|
||||||
|
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,15 @@ def get_processed_cards_path() -> str:
|
||||||
return os.path.join(card_files_processed_dir(), "all_cards.parquet")
|
return os.path.join(card_files_processed_dir(), "all_cards.parquet")
|
||||||
|
|
||||||
|
|
||||||
|
def get_commander_cards_path() -> str:
|
||||||
|
"""Get the path to the pre-filtered commander-only Parquet file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to card_files/processed/commander_cards.parquet
|
||||||
|
"""
|
||||||
|
return os.path.join(card_files_processed_dir(), "commander_cards.parquet")
|
||||||
|
|
||||||
|
|
||||||
def get_batch_path(batch_id: int) -> str:
|
def get_batch_path(batch_id: int) -> str:
|
||||||
"""Get the path to a batch Parquet file.
|
"""Get the path to a batch Parquet file.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -424,6 +424,16 @@ def load_and_tag_all_cards(parallel: bool = False, max_workers: int | None = Non
|
||||||
_data_loader.write_cards(df_final, output_path, format="parquet")
|
_data_loader.write_cards(df_final, output_path, format="parquet")
|
||||||
logger.info(f'✓ Wrote {len(df_final)} tagged cards to {output_path}')
|
logger.info(f'✓ Wrote {len(df_final)} tagged cards to {output_path}')
|
||||||
|
|
||||||
|
# M7: Write commander-only cache file for fast lookups
|
||||||
|
try:
|
||||||
|
if 'isCommander' in df_final.columns:
|
||||||
|
commander_df = df_final[df_final['isCommander'] == True].copy() # noqa: E712
|
||||||
|
commander_path = os.path.join(os.path.dirname(output_path), 'commander_cards.parquet')
|
||||||
|
_data_loader.write_cards(commander_df, commander_path, format="parquet")
|
||||||
|
logger.info(f'✓ Wrote {len(commander_df)} commanders to {commander_path}')
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'Failed to write commander cache: {e}')
|
||||||
|
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
logger.error(f'Error: {e}')
|
logger.error(f'Error: {e}')
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
|
|
@ -316,6 +316,15 @@ def commander_hover_context(
|
||||||
if token:
|
if token:
|
||||||
commander_color_identity.append(token)
|
commander_color_identity.append(token)
|
||||||
|
|
||||||
|
# M7: For non-partner commanders, also check summary.colors for color identity
|
||||||
|
if not commander_color_identity and not has_combined and isinstance(summary, dict):
|
||||||
|
summary_colors = summary.get("colors")
|
||||||
|
if isinstance(summary_colors, (list, tuple, set)):
|
||||||
|
for item in summary_colors:
|
||||||
|
token = str(item).strip().upper()
|
||||||
|
if token:
|
||||||
|
commander_color_identity.append(token)
|
||||||
|
|
||||||
commander_color_label = ""
|
commander_color_label = ""
|
||||||
if has_combined:
|
if has_combined:
|
||||||
commander_color_label = str(combined_info.get("color_label") or "").strip()
|
commander_color_label = str(combined_info.get("color_label") or "").strip()
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,12 @@ from pathlib import Path
|
||||||
from deck_builder.partner_selection import apply_partner_inputs
|
from deck_builder.partner_selection import apply_partner_inputs
|
||||||
from exceptions import CommanderPartnerError
|
from exceptions import CommanderPartnerError
|
||||||
|
|
||||||
|
# M7: Cache for commander DataFrame to avoid repeated Parquet loads
|
||||||
|
_COMMANDER_DF_CACHE: Dict[str, Any] = {"df": None, "mtime": None}
|
||||||
|
|
||||||
|
# M7: Cache for past builds summary to avoid repeated file scans
|
||||||
|
_PAST_BUILDS_CACHE: Dict[str, Any] = {"index": None, "mtime": None}
|
||||||
|
|
||||||
_TAG_ACRONYM_KEEP = {"EDH", "ETB", "ETBs", "CMC", "ET", "OTK"}
|
_TAG_ACRONYM_KEEP = {"EDH", "ETB", "ETBs", "CMC", "ET", "OTK"}
|
||||||
_REASON_SOURCE_OVERRIDES = {
|
_REASON_SOURCE_OVERRIDES = {
|
||||||
"creature_all_theme": "Theme Match",
|
"creature_all_theme": "Theme Match",
|
||||||
|
|
@ -447,8 +453,9 @@ def _attach_enforcement_plan(b: DeckBuilder, comp: Dict[str, Any] | None) -> Dic
|
||||||
|
|
||||||
|
|
||||||
def commander_names() -> List[str]:
|
def commander_names() -> List[str]:
|
||||||
tmp = DeckBuilder()
|
df = _get_cached_commander_df()
|
||||||
df = tmp.load_commander_data()
|
if df is None:
|
||||||
|
return []
|
||||||
return df["name"].astype(str).tolist()
|
return df["name"].astype(str).tolist()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -479,7 +486,9 @@ def commander_candidates(query: str, limit: int = 10) -> List[Tuple[str, int, Li
|
||||||
query = ' '.join([w[:1].upper() + w[1:].lower() if w else w for w in str(query).split(' ')])
|
query = ' '.join([w[:1].upper() + w[1:].lower() if w else w for w in str(query).split(' ')])
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
df = tmp.load_commander_data()
|
df = _get_cached_commander_df()
|
||||||
|
if df is None:
|
||||||
|
return []
|
||||||
# Filter to plausible commanders: Legendary Creature, or text explicitly allows being a commander.
|
# Filter to plausible commanders: Legendary Creature, or text explicitly allows being a commander.
|
||||||
try:
|
try:
|
||||||
cols = set(df.columns.astype(str))
|
cols = set(df.columns.astype(str))
|
||||||
|
|
@ -533,10 +542,7 @@ def commander_candidates(query: str, limit: int = 10) -> List[Tuple[str, int, Li
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Attach color identity for each candidate
|
# Attach color identity for each candidate
|
||||||
try:
|
df = _get_cached_commander_df()
|
||||||
df = tmp.load_commander_data()
|
|
||||||
except Exception:
|
|
||||||
df = None
|
|
||||||
q = (query or "").strip().lower()
|
q = (query or "").strip().lower()
|
||||||
qn = _simplify(query)
|
qn = _simplify(query)
|
||||||
tokens = [t for t in re.split(r"[\s,]+", q) if t]
|
tokens = [t for t in re.split(r"[\s,]+", q) if t]
|
||||||
|
|
@ -627,7 +633,9 @@ def commander_candidates(query: str, limit: int = 10) -> List[Tuple[str, int, Li
|
||||||
|
|
||||||
def commander_inspect(name: str) -> Dict[str, Any]:
|
def commander_inspect(name: str) -> Dict[str, Any]:
|
||||||
tmp = DeckBuilder()
|
tmp = DeckBuilder()
|
||||||
df = tmp.load_commander_data()
|
df = _get_cached_commander_df()
|
||||||
|
if df is None:
|
||||||
|
return {"ok": False, "error": "Commander data not available"}
|
||||||
row = df[df["name"] == name]
|
row = df[df["name"] == name]
|
||||||
if row.empty:
|
if row.empty:
|
||||||
return {"ok": False, "error": "Commander not found"}
|
return {"ok": False, "error": "Commander not found"}
|
||||||
|
|
@ -637,7 +645,9 @@ def commander_inspect(name: str) -> Dict[str, Any]:
|
||||||
|
|
||||||
def commander_select(name: str) -> Dict[str, Any]:
|
def commander_select(name: str) -> Dict[str, Any]:
|
||||||
tmp = DeckBuilder()
|
tmp = DeckBuilder()
|
||||||
df = tmp.load_commander_data()
|
df = _get_cached_commander_df()
|
||||||
|
if df is None:
|
||||||
|
return {"ok": False, "error": "Commander data not available"}
|
||||||
# Try exact match, then normalized match
|
# Try exact match, then normalized match
|
||||||
row = df[df["name"] == name]
|
row = df[df["name"] == name]
|
||||||
if row.empty:
|
if row.empty:
|
||||||
|
|
@ -661,15 +671,125 @@ def commander_select(name: str) -> Dict[str, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def tags_for_commander(name: str) -> List[str]:
|
def _get_cached_commander_df():
|
||||||
|
"""M7: Return cached commander DataFrame, loading only if needed or stale."""
|
||||||
|
global _COMMANDER_DF_CACHE
|
||||||
|
|
||||||
|
# Check if we need to reload (cache miss or file changed)
|
||||||
|
need_reload = _COMMANDER_DF_CACHE["df"] is None
|
||||||
|
|
||||||
|
if not need_reload:
|
||||||
|
# Check if the commander Parquet file has been modified since we cached it
|
||||||
|
try:
|
||||||
|
from path_util import get_commander_cards_path
|
||||||
|
commander_path = get_commander_cards_path()
|
||||||
|
if os.path.exists(commander_path):
|
||||||
|
current_mtime = os.path.getmtime(commander_path)
|
||||||
|
cached_mtime = _COMMANDER_DF_CACHE.get("mtime")
|
||||||
|
if cached_mtime is None or current_mtime > cached_mtime:
|
||||||
|
need_reload = True
|
||||||
|
else:
|
||||||
|
# If dedicated file doesn't exist, force reload to use fallback
|
||||||
|
need_reload = True
|
||||||
|
except Exception:
|
||||||
|
# If we can't check mtime, just use the cache if we have it
|
||||||
|
pass
|
||||||
|
|
||||||
|
if need_reload:
|
||||||
|
try:
|
||||||
tmp = DeckBuilder()
|
tmp = DeckBuilder()
|
||||||
df = tmp.load_commander_data()
|
df = tmp.load_commander_data()
|
||||||
|
from path_util import get_commander_cards_path
|
||||||
|
commander_path = get_commander_cards_path()
|
||||||
|
_COMMANDER_DF_CACHE["df"] = df
|
||||||
|
if os.path.exists(commander_path):
|
||||||
|
_COMMANDER_DF_CACHE["mtime"] = os.path.getmtime(commander_path)
|
||||||
|
else:
|
||||||
|
# No dedicated file - set mtime to None so we don't cache stale data
|
||||||
|
_COMMANDER_DF_CACHE["mtime"] = None
|
||||||
|
except Exception:
|
||||||
|
# Fall back to empty cache on error
|
||||||
|
_COMMANDER_DF_CACHE["df"] = None
|
||||||
|
_COMMANDER_DF_CACHE["mtime"] = None
|
||||||
|
|
||||||
|
return _COMMANDER_DF_CACHE["df"]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_past_builds_index() -> Dict[str, List[Dict[str, Any]]]:
|
||||||
|
"""M7: Return cached index of past builds: commander_name -> list of {tags, age_days}."""
|
||||||
|
global _PAST_BUILDS_CACHE
|
||||||
|
|
||||||
|
deck_files_dir = 'deck_files'
|
||||||
|
need_rebuild = _PAST_BUILDS_CACHE["index"] is None
|
||||||
|
|
||||||
|
if not need_rebuild:
|
||||||
|
# Check if deck_files directory has changed
|
||||||
|
try:
|
||||||
|
if os.path.exists(deck_files_dir):
|
||||||
|
current_mtime = os.path.getmtime(deck_files_dir)
|
||||||
|
cached_mtime = _PAST_BUILDS_CACHE.get("mtime")
|
||||||
|
if cached_mtime is None or current_mtime > cached_mtime:
|
||||||
|
need_rebuild = True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if need_rebuild:
|
||||||
|
index: Dict[str, List[Dict[str, Any]]] = {}
|
||||||
|
try:
|
||||||
|
for path in glob(os.path.join(deck_files_dir, '*.summary.json')):
|
||||||
|
try:
|
||||||
|
st = os.stat(path)
|
||||||
|
age_days = max(0, (time.time() - st.st_mtime) / 86400.0)
|
||||||
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f) or {}
|
||||||
|
meta = data.get('meta') or {}
|
||||||
|
commander = str(meta.get('commander', '')).strip()
|
||||||
|
if not commander:
|
||||||
|
continue
|
||||||
|
tags_list = meta.get('tags') or []
|
||||||
|
if not tags_list:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if commander not in index:
|
||||||
|
index[commander] = []
|
||||||
|
index[commander].append({
|
||||||
|
'tags': tags_list,
|
||||||
|
'age_days': age_days
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
_PAST_BUILDS_CACHE["index"] = index
|
||||||
|
if os.path.exists(deck_files_dir):
|
||||||
|
_PAST_BUILDS_CACHE["mtime"] = os.path.getmtime(deck_files_dir)
|
||||||
|
except Exception:
|
||||||
|
_PAST_BUILDS_CACHE["index"] = {}
|
||||||
|
_PAST_BUILDS_CACHE["mtime"] = None
|
||||||
|
|
||||||
|
return _PAST_BUILDS_CACHE["index"] or {}
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate_past_builds_cache():
|
||||||
|
"""M7: Force rebuild of past builds cache on next access (call after saving new builds)."""
|
||||||
|
global _PAST_BUILDS_CACHE
|
||||||
|
_PAST_BUILDS_CACHE["index"] = None
|
||||||
|
_PAST_BUILDS_CACHE["mtime"] = None
|
||||||
|
|
||||||
|
|
||||||
|
def tags_for_commander(name: str) -> List[str]:
|
||||||
|
df = _get_cached_commander_df()
|
||||||
|
if df is None:
|
||||||
|
return []
|
||||||
row = df[df["name"] == name]
|
row = df[df["name"] == name]
|
||||||
if row.empty:
|
if row.empty:
|
||||||
return []
|
return []
|
||||||
raw = row.iloc[0].get("themeTags", [])
|
raw = row.iloc[0].get("themeTags", [])
|
||||||
if isinstance(raw, list):
|
# Handle both list and NumPy array types from Parquet
|
||||||
|
if isinstance(raw, (list, tuple)) or hasattr(raw, '__iter__') and not isinstance(raw, str):
|
||||||
|
try:
|
||||||
return list(dict.fromkeys([str(t).strip() for t in raw if str(t).strip()]))
|
return list(dict.fromkeys([str(t).strip() for t in raw if str(t).strip()]))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if isinstance(raw, str) and raw.strip():
|
if isinstance(raw, str) and raw.strip():
|
||||||
parts = [p.strip().strip("'\"") for p in raw.split(',')]
|
parts = [p.strip().strip("'\"") for p in raw.split(',')]
|
||||||
return [p for p in parts if p]
|
return [p for p in parts if p]
|
||||||
|
|
@ -707,11 +827,8 @@ def _recommended_scored(name: str, max_items: int = 5) -> List[Tuple[str, int, L
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
return None
|
return None
|
||||||
try:
|
# M7: Use cached DataFrame instead of loading again
|
||||||
tmp = DeckBuilder()
|
df = _get_cached_commander_df()
|
||||||
df = tmp.load_commander_data()
|
|
||||||
except Exception:
|
|
||||||
df = None
|
|
||||||
# Gather commander text and colors
|
# Gather commander text and colors
|
||||||
text = ""
|
text = ""
|
||||||
colors: List[str] = []
|
colors: List[str] = []
|
||||||
|
|
@ -815,18 +932,13 @@ def _recommended_scored(name: str, max_items: int = 5) -> List[Tuple[str, int, L
|
||||||
if len(reasons[orig]) < 3 and cr not in reasons[orig]:
|
if len(reasons[orig]) < 3 and cr not in reasons[orig]:
|
||||||
reasons[orig].append(cr)
|
reasons[orig].append(cr)
|
||||||
|
|
||||||
# Past builds history
|
# Past builds history - M7: Use cached index instead of scanning files
|
||||||
try:
|
try:
|
||||||
for path in glob(os.path.join('deck_files', '*.summary.json')):
|
past_builds_index = _get_past_builds_index()
|
||||||
try:
|
builds_for_commander = past_builds_index.get(str(name).strip(), [])
|
||||||
st = os.stat(path)
|
for build in builds_for_commander:
|
||||||
age_days = max(0, (time.time() - st.st_mtime) / 86400.0)
|
age_days = build.get('age_days', 999)
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
tags_list = build.get('tags', [])
|
||||||
data = json.load(f) or {}
|
|
||||||
meta = data.get('meta') or {}
|
|
||||||
if str(meta.get('commander', '')).strip() != str(name).strip():
|
|
||||||
continue
|
|
||||||
tags_list = meta.get('tags') or []
|
|
||||||
for tg in tags_list:
|
for tg in tags_list:
|
||||||
tn = _norm(str(tg))
|
tn = _norm(str(tg))
|
||||||
if tn in available_norm:
|
if tn in available_norm:
|
||||||
|
|
@ -842,8 +954,6 @@ def _recommended_scored(name: str, max_items: int = 5) -> List[Tuple[str, int, L
|
||||||
lbl = "Popular in your past builds" + (" (recent)" if recent else "")
|
lbl = "Popular in your past builds" + (" (recent)" if recent else "")
|
||||||
if len(reasons[orig]) < 3 and lbl not in reasons[orig]:
|
if len(reasons[orig]) < 3 and lbl not in reasons[orig]:
|
||||||
reasons[orig].append(lbl)
|
reasons[orig].append(lbl)
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -1920,6 +2030,8 @@ def run_build(commander: str, tags: List[str], bracket: int, ideals: Dict[str, i
|
||||||
payload = {"meta": meta, "summary": summary}
|
payload = {"meta": meta, "summary": summary}
|
||||||
with open(sidecar, 'w', encoding='utf-8') as f:
|
with open(sidecar, 'w', encoding='utf-8') as f:
|
||||||
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||||
|
# M7: Invalidate past builds cache so new build appears in recommendations
|
||||||
|
invalidate_past_builds_cache()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Success return
|
# Success return
|
||||||
|
|
@ -2748,6 +2860,8 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
|
||||||
payload = {"meta": meta, "summary": summary}
|
payload = {"meta": meta, "summary": summary}
|
||||||
with open(sidecar, 'w', encoding='utf-8') as f:
|
with open(sidecar, 'w', encoding='utf-8') as f:
|
||||||
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||||
|
# M7: Invalidate past builds cache so new build appears in recommendations
|
||||||
|
invalidate_past_builds_cache()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return {
|
return {
|
||||||
|
|
@ -3597,6 +3711,8 @@ def run_stage(ctx: Dict[str, Any], rerun: bool = False, show_skipped: bool = Fal
|
||||||
payload = {"meta": meta, "summary": summary}
|
payload = {"meta": meta, "summary": summary}
|
||||||
with open(sidecar, 'w', encoding='utf-8') as f:
|
with open(sidecar, 'w', encoding='utf-8') as f:
|
||||||
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
_json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||||
|
# M7: Invalidate past builds cache so new build appears in recommendations
|
||||||
|
invalidate_past_builds_cache()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
# Final progress
|
# Final progress
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,6 @@
|
||||||
<button type="button" id="cand-{{ loop.index0 }}" class="chip candidate-btn" role="option" data-idx="{{ loop.index0 }}" data-name="{{ cand.value|e }}" data-display="{{ cand.display|e }}"
|
<button type="button" id="cand-{{ loop.index0 }}" class="chip candidate-btn" role="option" data-idx="{{ loop.index0 }}" data-name="{{ cand.value|e }}" data-display="{{ cand.display|e }}"
|
||||||
hx-get="/build/new/inspect?name={{ cand.display|urlencode }}"
|
hx-get="/build/new/inspect?name={{ cand.display|urlencode }}"
|
||||||
hx-target="#newdeck-tags-slot" hx-swap="innerHTML"
|
hx-target="#newdeck-tags-slot" hx-swap="innerHTML"
|
||||||
data-hx-cache="1" data-hx-cache-key="newdeck:inspect:{{ cand.display|lower }}" data-hx-cache-ttl="45000"
|
|
||||||
data-hx-prefetch="/build/new/inspect?name={{ cand.display|urlencode }}"
|
|
||||||
hx-on="htmx:afterOnLoad: (function(){ try{ var preferred=this.getAttribute('data-name')||''; var displayed=this.getAttribute('data-display')||preferred; var ci = document.querySelector('input[name=commander]'); if(ci){ ci.value=preferred; try{ ci.selectionStart = ci.selectionEnd = ci.value.length; }catch(_){} try{ ci.dispatchEvent(new Event('input', { bubbles: true })); }catch(_){ } } var nm = document.querySelector('input[name=name]'); if(nm && (!nm.value || !nm.value.trim())){ nm.value=displayed; } }catch(_){ } }).call(this)">
|
hx-on="htmx:afterOnLoad: (function(){ try{ var preferred=this.getAttribute('data-name')||''; var displayed=this.getAttribute('data-display')||preferred; var ci = document.querySelector('input[name=commander]'); if(ci){ ci.value=preferred; try{ ci.selectionStart = ci.selectionEnd = ci.value.length; }catch(_){} try{ ci.dispatchEvent(new Event('input', { bubbles: true })); }catch(_){ } } var nm = document.querySelector('input[name=name]'); if(nm && (!nm.value || !nm.value.trim())){ nm.value=displayed; } }catch(_){ } }).call(this)">
|
||||||
{{ cand.display }}
|
{{ cand.display }}
|
||||||
{% if cand.warning %}
|
{% if cand.warning %}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,18 @@ seed_defaults() {
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Copy/download commander cache (new in M7 for fast commander lookups)
|
||||||
|
mkdir -p /app/card_files/processed
|
||||||
|
if [ ! -f /app/card_files/processed/commander_cards.parquet ]; then
|
||||||
|
if [ -f /.defaults/card_files/processed/commander_cards.parquet ]; then
|
||||||
|
echo "Copying pre-built commander cache from image..."
|
||||||
|
cp /.defaults/card_files/processed/commander_cards.parquet /app/card_files/processed/ 2>/dev/null || true
|
||||||
|
else
|
||||||
|
echo "Downloading commander cache from GitHub..."
|
||||||
|
wget -q https://raw.githubusercontent.com/mwisnowski/mtg_python_deckbuilder/similarity-cache-data/card_files/processed/commander_cards.parquet -O /app/card_files/processed/commander_cards.parquet 2>/dev/null || echo "Warning: Could not download commander cache (will be generated during setup)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Copy from baked-in defaults if targets are missing
|
# Copy from baked-in defaults if targets are missing
|
||||||
if [ -d "/.defaults/config" ]; then
|
if [ -d "/.defaults/config" ]; then
|
||||||
# deck.json
|
# deck.json
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue