mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2026-03-24 22:16:31 +01:00
feat: add theme quality, pool size, and popularity badges with filtering (#56)
This commit is contained in:
parent
03e2846882
commit
8efdc77c08
21 changed files with 1165 additions and 64 deletions
|
|
@ -8,6 +8,7 @@ Responsibilities:
|
|||
- Provide summary & detail projections (with synergy segmentation).
|
||||
- NEW (Phase F perf): precompute summary dicts & lowercase haystacks, and
|
||||
add fast filtering / result caching to accelerate list & API endpoints.
|
||||
- R20: Calculate theme quality scores and pool sizes for UI display.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -69,6 +70,10 @@ class SlugThemeIndex(BaseModel):
|
|||
haystack_by_slug: Dict[str, str]
|
||||
primary_color_by_slug: Dict[str, Optional[str]]
|
||||
secondary_color_by_slug: Dict[str, Optional[str]]
|
||||
quality_tier_by_slug: Dict[str, Optional[str]] # R20: Excellent/Good/Fair/Poor
|
||||
quality_score_by_slug: Dict[str, Optional[float]] # R20: 0.0-1.0 normalized quality
|
||||
pool_size_by_slug: Dict[str, int] # R20: Number of cards with this theme tag
|
||||
pool_tier_by_slug: Dict[str, str] # R20: Vast/Large/Moderate/Small/Tiny
|
||||
mtime: float
|
||||
yaml_mtime_max: float
|
||||
etag: str
|
||||
|
|
@ -162,6 +167,80 @@ def _compute_etag(size: int, mtime: float, yaml_mtime: float) -> str:
|
|||
return f"{int(size)}-{int(mtime)}-{int(yaml_mtime)}"
|
||||
|
||||
|
||||
def _calculate_theme_pool_sizes() -> Dict[str, int]:
|
||||
"""Calculate card pool sizes for each theme from parquet data (R20).
|
||||
|
||||
Returns:
|
||||
Dict mapping theme name to count of cards with that theme tag.
|
||||
"""
|
||||
pool_sizes: Dict[str, int] = {}
|
||||
|
||||
try:
|
||||
# Import pandas and path_util inside function to avoid circular imports
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from path_util import get_processed_cards_path
|
||||
|
||||
parquet_path = get_processed_cards_path()
|
||||
if not Path(parquet_path).exists():
|
||||
return pool_sizes
|
||||
|
||||
# Read parquet file
|
||||
df = pd.read_parquet(parquet_path)
|
||||
|
||||
if 'themeTags' not in df.columns:
|
||||
return pool_sizes
|
||||
|
||||
# Count cards for each theme
|
||||
for tags in df['themeTags']:
|
||||
# Check for null/NA (works for scalars only, arrays need different check)
|
||||
if tags is None or (isinstance(tags, str) and tags == ''):
|
||||
continue
|
||||
|
||||
# Handle different formats: numpy array, list, string
|
||||
if isinstance(tags, np.ndarray):
|
||||
theme_list = [str(t).strip() for t in tags if t]
|
||||
elif isinstance(tags, list):
|
||||
theme_list = [str(t).strip() for t in tags if t]
|
||||
elif isinstance(tags, str):
|
||||
theme_list = [t.strip() for t in tags.split(',') if t.strip()]
|
||||
else:
|
||||
continue
|
||||
|
||||
for theme in theme_list:
|
||||
if theme:
|
||||
# Normalize theme name: Strip leading/trailing spaces
|
||||
normalized = theme.strip()
|
||||
pool_sizes[normalized] = pool_sizes.get(normalized, 0) + 1
|
||||
|
||||
except Exception:
|
||||
# Silently fail if parquet unavailable (e.g., during initial setup)
|
||||
pass
|
||||
|
||||
return pool_sizes
|
||||
|
||||
|
||||
def _categorize_pool_size(count: int) -> str:
|
||||
"""Categorize pool size into tier (R20).
|
||||
|
||||
Args:
|
||||
count: Number of cards with this theme tag
|
||||
|
||||
Returns:
|
||||
Tier: Vast/Large/Moderate/Small/Tiny
|
||||
"""
|
||||
if count >= 500:
|
||||
return 'Vast'
|
||||
elif count >= 200:
|
||||
return 'Large'
|
||||
elif count >= 50:
|
||||
return 'Moderate'
|
||||
elif count >= 15:
|
||||
return 'Small'
|
||||
else:
|
||||
return 'Tiny'
|
||||
|
||||
|
||||
def load_index() -> SlugThemeIndex:
|
||||
if not _needs_reload():
|
||||
return _CACHE["index"]
|
||||
|
|
@ -174,14 +253,92 @@ def load_index() -> SlugThemeIndex:
|
|||
haystack_by_slug: Dict[str, str] = {}
|
||||
primary_color_by_slug: Dict[str, Optional[str]] = {}
|
||||
secondary_color_by_slug: Dict[str, Optional[str]] = {}
|
||||
quality_tier_by_slug: Dict[str, Optional[str]] = {} # R20
|
||||
quality_score_by_slug: Dict[str, Optional[float]] = {} # R20
|
||||
pool_size_by_slug: Dict[str, int] = {} # R20
|
||||
pool_tier_by_slug: Dict[str, str] = {} # R20
|
||||
|
||||
# R20: Calculate pool sizes from parquet data
|
||||
theme_pool_sizes = _calculate_theme_pool_sizes()
|
||||
|
||||
# R20: Calculate global card frequency directly to avoid circular dependency
|
||||
global_card_freq: Dict[str, int] = {}
|
||||
for t in catalog.themes:
|
||||
if t.example_cards:
|
||||
for card in t.example_cards:
|
||||
global_card_freq[card] = global_card_freq.get(card, 0) + 1
|
||||
|
||||
total_themes = len(catalog.themes)
|
||||
|
||||
for t in catalog.themes:
|
||||
slug = slugify(t.theme)
|
||||
slug_to_entry[slug] = t
|
||||
summary = project_summary(t)
|
||||
summary_by_slug[slug] = summary
|
||||
# Will populate quality after calculation below
|
||||
summary_by_slug[slug] = {} # Populated after quality calculation
|
||||
haystack_by_slug[slug] = "|".join([t.theme] + t.synergies).lower()
|
||||
primary_color_by_slug[slug] = t.primary_color
|
||||
secondary_color_by_slug[slug] = t.secondary_color
|
||||
|
||||
# R20: Calculate quality metrics inline (avoids ThemeEditorialService circular dependency)
|
||||
# Enhanced scoring: Card count (0-30) + Uniqueness (0-40) + Description (0-20) + Curation (0-10) = 100 max
|
||||
total_points = 0.0
|
||||
max_points = 100.0
|
||||
|
||||
# 1. Example card count (0-30 points, 8+ cards = max)
|
||||
card_count = len(t.example_cards) if t.example_cards else 0
|
||||
card_points = min(30.0, (card_count / 8) * 30.0)
|
||||
total_points += card_points
|
||||
|
||||
# 2. Uniqueness ratio (0-40 points, cards in <25% of themes)
|
||||
if t.example_cards and total_themes > 0:
|
||||
unique_count = sum(
|
||||
1 for card in t.example_cards
|
||||
if (global_card_freq.get(card, 0) / total_themes) < 0.25
|
||||
)
|
||||
uniqueness_ratio = unique_count / len(t.example_cards)
|
||||
uniqueness_points = uniqueness_ratio * 40.0
|
||||
total_points += uniqueness_points
|
||||
|
||||
# 3. Description quality (0-20 points: manual=10, rule=5, generic=0)
|
||||
if t.description_source:
|
||||
desc_bonus = {'manual': 10, 'rule': 5, 'generic': 0}.get(t.description_source, 0)
|
||||
total_points += desc_bonus
|
||||
|
||||
# 4. Manual curation bonus (0-10 points if has curated_synergies)
|
||||
if hasattr(t, 'curated_synergies') and t.curated_synergies:
|
||||
total_points += 10.0
|
||||
|
||||
# Normalize to 0.0-1.0 and determine tier
|
||||
normalized_score = total_points / max_points
|
||||
if normalized_score >= 0.75:
|
||||
tier = 'Excellent'
|
||||
elif normalized_score >= 0.60:
|
||||
tier = 'Good'
|
||||
elif normalized_score >= 0.40:
|
||||
tier = 'Fair'
|
||||
else:
|
||||
tier = 'Poor'
|
||||
|
||||
quality_tier_by_slug[slug] = tier
|
||||
quality_score_by_slug[slug] = normalized_score
|
||||
|
||||
# R20: Calculate pool size from parquet data (normalize theme name for lookup)
|
||||
normalized_theme_name = t.theme.strip()
|
||||
pool_size = theme_pool_sizes.get(normalized_theme_name, 0)
|
||||
pool_tier = _categorize_pool_size(pool_size)
|
||||
pool_size_by_slug[slug] = pool_size
|
||||
pool_tier_by_slug[slug] = pool_tier
|
||||
|
||||
# R20: Now create summary with quality and pool size data
|
||||
summary = project_summary(
|
||||
t,
|
||||
quality_tier=tier,
|
||||
quality_score=normalized_score,
|
||||
pool_size=pool_size,
|
||||
pool_tier=pool_tier
|
||||
)
|
||||
summary_by_slug[slug] = summary
|
||||
|
||||
yaml_map, yaml_mtime_max = _load_yaml_map()
|
||||
idx = SlugThemeIndex(
|
||||
catalog=catalog,
|
||||
|
|
@ -191,6 +348,10 @@ def load_index() -> SlugThemeIndex:
|
|||
haystack_by_slug=haystack_by_slug,
|
||||
primary_color_by_slug=primary_color_by_slug,
|
||||
secondary_color_by_slug=secondary_color_by_slug,
|
||||
quality_tier_by_slug=quality_tier_by_slug, # R20
|
||||
quality_score_by_slug=quality_score_by_slug, # R20
|
||||
pool_size_by_slug=pool_size_by_slug, # R20
|
||||
pool_tier_by_slug=pool_tier_by_slug, # R20
|
||||
mtime=CATALOG_JSON.stat().st_mtime,
|
||||
yaml_mtime_max=yaml_mtime_max,
|
||||
etag=_compute_etag(CATALOG_JSON.stat().st_size, CATALOG_JSON.stat().st_mtime, yaml_mtime_max),
|
||||
|
|
@ -284,7 +445,13 @@ def has_fallback_description(entry: ThemeEntry) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def project_summary(entry: ThemeEntry) -> Dict[str, Any]:
|
||||
def project_summary(
|
||||
entry: ThemeEntry,
|
||||
quality_tier: Optional[str] = None,
|
||||
quality_score: Optional[float] = None,
|
||||
pool_size: Optional[int] = None,
|
||||
pool_tier: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
# Short description (snippet) for list hover / condensed display
|
||||
desc = entry.description or ""
|
||||
short_desc = desc.strip()
|
||||
|
|
@ -298,6 +465,10 @@ def project_summary(entry: ThemeEntry) -> Dict[str, Any]:
|
|||
"popularity_bucket": entry.popularity_bucket,
|
||||
"deck_archetype": entry.deck_archetype,
|
||||
"editorial_quality": entry.editorial_quality,
|
||||
"quality_tier": quality_tier, # R20: Quality tier (Excellent/Good/Fair/Poor)
|
||||
"quality_score": quality_score, # R20: Normalized quality score (0.0-1.0)
|
||||
"pool_size": pool_size, # R20: Number of cards with this theme tag
|
||||
"pool_tier": pool_tier, # R20: Pool size tier (Vast/Large/Moderate/Small/Tiny)
|
||||
"description": entry.description,
|
||||
"short_description": short_desc,
|
||||
"synergies": entry.synergies,
|
||||
|
|
@ -317,7 +488,7 @@ def _split_synergies(slug: str, entry: ThemeEntry, yaml_map: Dict[str, Dict[str,
|
|||
}
|
||||
|
||||
|
||||
def project_detail(slug: str, entry: ThemeEntry, yaml_map: Dict[str, Dict[str, Any]], uncapped: bool = False) -> Dict[str, Any]:
|
||||
def project_detail(slug: str, entry: ThemeEntry, yaml_map: Dict[str, Dict[str, Any]], idx: 'SlugThemeIndex', uncapped: bool = False) -> Dict[str, Any]:
|
||||
seg = _split_synergies(slug, entry, yaml_map)
|
||||
uncapped_synergies: Optional[List[str]] = None
|
||||
if uncapped:
|
||||
|
|
@ -330,7 +501,18 @@ def project_detail(slug: str, entry: ThemeEntry, yaml_map: Dict[str, Dict[str, A
|
|||
full.append(s)
|
||||
seen.add(s)
|
||||
uncapped_synergies = full
|
||||
d = project_summary(entry)
|
||||
# R20: Look up quality and pool metrics from index (not stored on ThemeEntry)
|
||||
quality_tier = idx.quality_tier_by_slug.get(slug)
|
||||
quality_score = idx.quality_score_by_slug.get(slug)
|
||||
pool_size = idx.pool_size_by_slug.get(slug, 0)
|
||||
pool_tier = idx.pool_tier_by_slug.get(slug, "Tiny")
|
||||
d = project_summary(
|
||||
entry,
|
||||
quality_tier=quality_tier,
|
||||
quality_score=quality_score,
|
||||
pool_size=pool_size,
|
||||
pool_tier=pool_tier
|
||||
)
|
||||
d.update({
|
||||
"curated_synergies": seg["curated"],
|
||||
"enforced_synergies": seg["enforced"],
|
||||
|
|
@ -449,6 +631,7 @@ def summaries_for_slugs(idx: SlugThemeIndex, slugs: Iterable[str]) -> List[Dict[
|
|||
for s in slugs:
|
||||
summ = idx.summary_by_slug.get(s)
|
||||
if summ:
|
||||
# R20: Summary already contains quality_tier and quality_score from load_index
|
||||
out.append(summ.copy()) # shallow copy so route can pop diag-only fields
|
||||
return out
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue