mtg_python_deckbuilder/code/web/services/sampling.py

"""Sampling utilities extracted from theme_preview (Core Refactor Phase A - initial extraction).

This module contains card index construction and the deterministic sampling
pipeline used to build preview role buckets. Logic moved with minimal changes
to preserve behavior; future refactor steps will further decompose (e.g.,
separating card index & rarity calibration, introducing typed models).

Public (stable) surface for Phase A:
    sample_real_cards_for_theme(theme: str, limit: int, colors_filter: str | None,
                                *, synergies: list[str], commander: str | None) -> list[dict]

Internal helpers intentionally start with an underscore to discourage external
use; they may change in subsequent refactor steps.
"""
from __future__ import annotations

import random
from typing import Any, Dict, List, Optional, TypedDict

from .card_index import maybe_build_index, get_tag_pool, lookup_commander
from .sampling_config import (
    COMMANDER_COLOR_FILTER_STRICT,
    COMMANDER_OVERLAP_BONUS,
    COMMANDER_THEME_MATCH_BONUS,
    SPLASH_OFF_COLOR_PENALTY,
    SPLASH_ADAPTIVE_ENABLED,
    parse_splash_adaptive_scale,
    ROLE_BASE_WEIGHTS,
    ROLE_SATURATION_PENALTY,
    rarity_weight_base,
    parse_rarity_diversity_targets,
    RARITY_DIVERSITY_OVER_PENALTY,
)


_CARD_INDEX_DEPRECATED: Dict[str, List[Dict[str, Any]]] = {}  # kept for back-compat in tests; will be removed


class SampledCard(TypedDict, total=False):
    """Typed shape for a sampled card entry emitted to preview layer.

    total=False because curated examples / synthetic placeholders may lack
    full DB-enriched fields (mana_cost, rarity, color_identity_list, etc.).
    """
    name: str
    colors: List[str]
    roles: List[str]
    tags: List[str]
    score: float
    reasons: List[str]
    mana_cost: str
    rarity: str
    color_identity_list: List[str]
    pip_colors: List[str]


def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:
    tag_set = set(tags)
    synergy_overlap = tag_set.intersection(synergies)
    if theme in tag_set:
        return "payoff"
    if len(synergy_overlap) >= 2:
        return "enabler"
    if len(synergy_overlap) == 1:
        return "support"
    return "wildcard"


def _seed_from(theme: str, commander: Optional[str]) -> int:
    base = f"{theme.lower()}|{(commander or '').lower()}".encode("utf-8")
    h = 0
    for b in base:
        h = (h * 131 + b) & 0xFFFFFFFF
    return h or 1


def _deterministic_shuffle(items: List[Any], seed: int) -> None:
    rnd = random.Random(seed)
    rnd.shuffle(items)


def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:
    tag_set = set(tags)
    synergy_overlap = len(tag_set.intersection(synergies))
    score = 0.0
    if theme in tag_set:
        score += 3.0
    score += synergy_overlap * 1.2
    score += ROLE_BASE_WEIGHTS.get(role, 0.5)
    return score


def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:
    if not commander_tags or not synergy_set:
        return 0.0
    overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))
    if overlap_synergy <= 0:
        return 0.0
    return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))


def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]:  # thin wrapper for legacy name
    return lookup_commander(commander)


def sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[SampledCard]:
    """Return scored, role-classified real cards for a theme.

    Mirrors prior `_sample_real_cards_for_theme` behavior for parity.
    """
    maybe_build_index()
    pool = get_tag_pool(theme)
    if not pool:
        return []
    commander_card = _lookup_commander(commander)
    commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()
    commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()
    if colors_filter:
        allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}
        if allowed:
            pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]
    if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:
        allow_splash = len(commander_colors) >= 4
        new_pool: List[Dict[str, Any]] = []
        for c in pool:
            ci = set(c.get("color_identity", ""))
            if not ci or ci.issubset(commander_colors):
                new_pool.append(c)
                continue
            if allow_splash:
                off = ci - commander_colors
                if len(off) == 1:
                    c["_splash_off_color"] = True
                    new_pool.append(c)
                    continue
        pool = new_pool
    seen_names: set[str] = set()
    payoff: List[SampledCard] = []
    enabler: List[SampledCard] = []
    support: List[SampledCard] = []
    wildcard: List[SampledCard] = []
    rarity_counts: Dict[str, int] = {}
    rarity_diversity = parse_rarity_diversity_targets()
    synergy_set = set(synergies)
    rarity_weight_cfg = rarity_weight_base()
    splash_scale = parse_splash_adaptive_scale() if SPLASH_ADAPTIVE_ENABLED else None
    commander_color_count = len(commander_colors) if commander_colors else 0
    for raw in pool:
        nm = raw.get("name")
        if not nm or nm in seen_names:
            continue
        seen_names.add(nm)
        tags = raw.get("tags", [])
        role = _classify_role(theme, synergies, tags)
        score = _score_card(theme, synergies, role, tags)
        reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]
        if commander_card:
            if theme in tags:
                score += COMMANDER_THEME_MATCH_BONUS
                reasons.append("commander_theme_match")
            scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)
            if scaled:
                score += scaled
                reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")
            reasons.append("commander_bias")
        rarity = raw.get("rarity") or ""
        if rarity:
            base_rarity_weight = rarity_weight_cfg.get(rarity, 0.25)
            count_so_far = rarity_counts.get(rarity, 0)
            increment_weight = base_rarity_weight / (1 + 0.4 * count_so_far)
            score += increment_weight
            rarity_counts[rarity] = count_so_far + 1
            reasons.append(f"rarity_weight_calibrated:{rarity}:{round(increment_weight,2)}")
            if rarity_diversity and rarity in rarity_diversity:
                lo, hi = rarity_diversity[rarity]
                # Only enforce upper bound (overflow penalty)
                if rarity_counts[rarity] > hi:
                    score += RARITY_DIVERSITY_OVER_PENALTY
                    reasons.append(f"rarity_diversity_overflow:{rarity}:{hi}:{RARITY_DIVERSITY_OVER_PENALTY}")
        if raw.get("_splash_off_color"):
            penalty = SPLASH_OFF_COLOR_PENALTY
            if splash_scale and commander_color_count:
                scale = splash_scale.get(commander_color_count, 1.0)
                adaptive_penalty = round(penalty * scale, 4)
                score += adaptive_penalty
                reasons.append(f"splash_off_color_penalty_adaptive:{commander_color_count}:{adaptive_penalty}")
            else:
                score += penalty  # negative value
                reasons.append(f"splash_off_color_penalty:{penalty}")
        item: SampledCard = {
            "name": nm,
            "colors": list(raw.get("color_identity", "")),
            "roles": [role],
            "tags": tags,
            "score": score,
            "reasons": reasons,
            "mana_cost": raw.get("mana_cost"),
            "rarity": rarity,
            "color_identity_list": raw.get("color_identity_list", []),
            "pip_colors": raw.get("pip_colors", []),
        }
        if role == "payoff":
            payoff.append(item)
        elif role == "enabler":
            enabler.append(item)
        elif role == "support":
            support.append(item)
        else:
            wildcard.append(item)
    seed = _seed_from(theme, commander)
    for bucket in (payoff, enabler, support, wildcard):
        _deterministic_shuffle(bucket, seed)
        bucket.sort(key=lambda x: (-x["score"], x["name"]))
    target_payoff = max(1, int(round(limit * 0.4)))
    target_enabler_support = max(1, int(round(limit * 0.4)))
    target_wild = max(0, limit - target_payoff - target_enabler_support)

    def take(n: int, source: List[SampledCard]):
        for i in range(min(n, len(source))):
            yield source[i]

    chosen: List[SampledCard] = []
    chosen.extend(take(target_payoff, payoff))
    es_combined = enabler + support
    chosen.extend(take(target_enabler_support, es_combined))
    chosen.extend(take(target_wild, wildcard))

    if len(chosen) < limit:
        def fill_from(src: List[SampledCard]):
            nonlocal chosen
            for it in src:
                if len(chosen) >= limit:
                    break
                if it not in chosen:
                    chosen.append(it)
        for bucket in (payoff, enabler, support, wildcard):
            fill_from(bucket)

    role_soft_caps = {
        "payoff": int(round(limit * 0.5)),
        "enabler": int(round(limit * 0.35)),
        "support": int(round(limit * 0.35)),
        "wildcard": int(round(limit * 0.25)),
    }
    role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}
    for it in chosen:
        r = (it.get("roles") or [None])[0]
        if not r or r not in role_soft_caps:
            continue
        role_seen[r] += 1
        if role_seen[r] > max(1, role_soft_caps[r]):
            it["score"] = it.get("score", 0) + ROLE_SATURATION_PENALTY  # negative value
            (it.setdefault("reasons", [])).append(f"role_saturation_penalty:{ROLE_SATURATION_PENALTY}")
    if len(chosen) > limit:
        chosen = chosen[:limit]
    return chosen

# Expose overlap scale for unit tests
commander_overlap_scale = _commander_overlap_scale
feat(web): Core Refactor Phase A — extract sampling and cache modules; add adaptive TTL + eviction heuristics, Redis PoC, and metrics wiring. Tests added for TTL, eviction, exports, splash-adaptive, card index, and service worker. Docs+roadmap updated. 2025-09-24 13:57:23 -07:00			`"""Sampling utilities extracted from theme_preview (Core Refactor Phase A - initial extraction).`

			`This module contains card index construction and the deterministic sampling`
			`pipeline used to build preview role buckets. Logic moved with minimal changes`
			`to preserve behavior; future refactor steps will further decompose (e.g.,`
			`separating card index & rarity calibration, introducing typed models).`

			`Public (stable) surface for Phase A:`
			`sample_real_cards_for_theme(theme: str, limit: int, colors_filter: str \| None,`
			`*, synergies: list[str], commander: str \| None) -> list[dict]`

			`Internal helpers intentionally start with an underscore to discourage external`
			`use; they may change in subsequent refactor steps.`
			`"""`
			`from __future__ import annotations`

			`import random`
			`from typing import Any, Dict, List, Optional, TypedDict`

			`from .card_index import maybe_build_index, get_tag_pool, lookup_commander`
			`from .sampling_config import (`
			`COMMANDER_COLOR_FILTER_STRICT,`
			`COMMANDER_OVERLAP_BONUS,`
			`COMMANDER_THEME_MATCH_BONUS,`
			`SPLASH_OFF_COLOR_PENALTY,`
			`SPLASH_ADAPTIVE_ENABLED,`
			`parse_splash_adaptive_scale,`
			`ROLE_BASE_WEIGHTS,`
			`ROLE_SATURATION_PENALTY,`
			`rarity_weight_base,`
			`parse_rarity_diversity_targets,`
			`RARITY_DIVERSITY_OVER_PENALTY,`
			`)`


			`_CARD_INDEX_DEPRECATED: Dict[str, List[Dict[str, Any]]] = {} # kept for back-compat in tests; will be removed`


			`class SampledCard(TypedDict, total=False):`
			`"""Typed shape for a sampled card entry emitted to preview layer.`

			`total=False because curated examples / synthetic placeholders may lack`
			`full DB-enriched fields (mana_cost, rarity, color_identity_list, etc.).`
			`"""`
			`name: str`
			`colors: List[str]`
			`roles: List[str]`
			`tags: List[str]`
			`score: float`
			`reasons: List[str]`
			`mana_cost: str`
			`rarity: str`
			`color_identity_list: List[str]`
			`pip_colors: List[str]`


			`def _classify_role(theme: str, synergies: List[str], tags: List[str]) -> str:`
			`tag_set = set(tags)`
			`synergy_overlap = tag_set.intersection(synergies)`
			`if theme in tag_set:`
			`return "payoff"`
			`if len(synergy_overlap) >= 2:`
			`return "enabler"`
			`if len(synergy_overlap) == 1:`
			`return "support"`
			`return "wildcard"`


			`def _seed_from(theme: str, commander: Optional[str]) -> int:`
			`base = f"{theme.lower()}\|{(commander or '').lower()}".encode("utf-8")`
			`h = 0`
			`for b in base:`
			`h = (h * 131 + b) & 0xFFFFFFFF`
			`return h or 1`


			`def _deterministic_shuffle(items: List[Any], seed: int) -> None:`
			`rnd = random.Random(seed)`
			`rnd.shuffle(items)`


			`def _score_card(theme: str, synergies: List[str], role: str, tags: List[str]) -> float:`
			`tag_set = set(tags)`
			`synergy_overlap = len(tag_set.intersection(synergies))`
			`score = 0.0`
			`if theme in tag_set:`
			`score += 3.0`
			`score += synergy_overlap * 1.2`
			`score += ROLE_BASE_WEIGHTS.get(role, 0.5)`
			`return score`


			`def _commander_overlap_scale(commander_tags: set[str], card_tags: List[str], synergy_set: set[str]) -> float:`
			`if not commander_tags or not synergy_set:`
			`return 0.0`
			`overlap_synergy = len(commander_tags.intersection(synergy_set).intersection(card_tags))`
			`if overlap_synergy <= 0:`
			`return 0.0`
			`return COMMANDER_OVERLAP_BONUS * (1 - (0.5 ** overlap_synergy))`


			`def _lookup_commander(commander: Optional[str]) -> Optional[Dict[str, Any]]: # thin wrapper for legacy name`
			`return lookup_commander(commander)`


			`def sample_real_cards_for_theme(theme: str, limit: int, colors_filter: Optional[str], *, synergies: List[str], commander: Optional[str]) -> List[SampledCard]:`
			`"""Return scored, role-classified real cards for a theme.`

			Mirrors prior `_sample_real_cards_for_theme` behavior for parity.
			`"""`
			`maybe_build_index()`
			`pool = get_tag_pool(theme)`
			`if not pool:`
			`return []`
			`commander_card = _lookup_commander(commander)`
			`commander_colors: set[str] = set(commander_card.get("color_identity", "")) if commander_card else set()`
			`commander_tags: set[str] = set(commander_card.get("tags", [])) if commander_card else set()`
			`if colors_filter:`
			`allowed = {c.strip().upper() for c in colors_filter.split(',') if c.strip()}`
			`if allowed:`
			`pool = [c for c in pool if set(c.get("color_identity", "")).issubset(allowed) or not c.get("color_identity")]`
			`if commander_card and COMMANDER_COLOR_FILTER_STRICT and commander_colors:`
			`allow_splash = len(commander_colors) >= 4`
			`new_pool: List[Dict[str, Any]] = []`
			`for c in pool:`
			`ci = set(c.get("color_identity", ""))`
			`if not ci or ci.issubset(commander_colors):`
			`new_pool.append(c)`
			`continue`
			`if allow_splash:`
			`off = ci - commander_colors`
			`if len(off) == 1:`
fix(lint): improved type checking and code quality (77% error reduction) 2025-10-31 08:18:09 -07:00			`c["_splash_off_color"] = True`
feat(web): Core Refactor Phase A — extract sampling and cache modules; add adaptive TTL + eviction heuristics, Redis PoC, and metrics wiring. Tests added for TTL, eviction, exports, splash-adaptive, card index, and service worker. Docs+roadmap updated. 2025-09-24 13:57:23 -07:00			`new_pool.append(c)`
			`continue`
			`pool = new_pool`
			`seen_names: set[str] = set()`
			`payoff: List[SampledCard] = []`
			`enabler: List[SampledCard] = []`
			`support: List[SampledCard] = []`
			`wildcard: List[SampledCard] = []`
			`rarity_counts: Dict[str, int] = {}`
			`rarity_diversity = parse_rarity_diversity_targets()`
			`synergy_set = set(synergies)`
			`rarity_weight_cfg = rarity_weight_base()`
			`splash_scale = parse_splash_adaptive_scale() if SPLASH_ADAPTIVE_ENABLED else None`
			`commander_color_count = len(commander_colors) if commander_colors else 0`
			`for raw in pool:`
			`nm = raw.get("name")`
			`if not nm or nm in seen_names:`
			`continue`
			`seen_names.add(nm)`
			`tags = raw.get("tags", [])`
			`role = _classify_role(theme, synergies, tags)`
			`score = _score_card(theme, synergies, role, tags)`
			`reasons = [f"role:{role}", f"synergy_overlap:{len(set(tags).intersection(synergies))}"]`
			`if commander_card:`
			`if theme in tags:`
			`score += COMMANDER_THEME_MATCH_BONUS`
			`reasons.append("commander_theme_match")`
			`scaled = _commander_overlap_scale(commander_tags, tags, synergy_set)`
			`if scaled:`
			`score += scaled`
			`reasons.append(f"commander_synergy_overlap:{len(commander_tags.intersection(synergy_set).intersection(tags))}:{round(scaled,2)}")`
			`reasons.append("commander_bias")`
			`rarity = raw.get("rarity") or ""`
			`if rarity:`
			`base_rarity_weight = rarity_weight_cfg.get(rarity, 0.25)`
			`count_so_far = rarity_counts.get(rarity, 0)`
			`increment_weight = base_rarity_weight / (1 + 0.4 * count_so_far)`
			`score += increment_weight`
			`rarity_counts[rarity] = count_so_far + 1`
			`reasons.append(f"rarity_weight_calibrated:{rarity}:{round(increment_weight,2)}")`
			`if rarity_diversity and rarity in rarity_diversity:`
			`lo, hi = rarity_diversity[rarity]`
			`# Only enforce upper bound (overflow penalty)`
			`if rarity_counts[rarity] > hi:`
			`score += RARITY_DIVERSITY_OVER_PENALTY`
			`reasons.append(f"rarity_diversity_overflow:{rarity}:{hi}:{RARITY_DIVERSITY_OVER_PENALTY}")`
			`if raw.get("_splash_off_color"):`
			`penalty = SPLASH_OFF_COLOR_PENALTY`
			`if splash_scale and commander_color_count:`
			`scale = splash_scale.get(commander_color_count, 1.0)`
			`adaptive_penalty = round(penalty * scale, 4)`
			`score += adaptive_penalty`
			`reasons.append(f"splash_off_color_penalty_adaptive:{commander_color_count}:{adaptive_penalty}")`
			`else:`
			`score += penalty # negative value`
			`reasons.append(f"splash_off_color_penalty:{penalty}")`
			`item: SampledCard = {`
			`"name": nm,`
			`"colors": list(raw.get("color_identity", "")),`
			`"roles": [role],`
			`"tags": tags,`
			`"score": score,`
			`"reasons": reasons,`
			`"mana_cost": raw.get("mana_cost"),`
			`"rarity": rarity,`
			`"color_identity_list": raw.get("color_identity_list", []),`
			`"pip_colors": raw.get("pip_colors", []),`
			`}`
			`if role == "payoff":`
			`payoff.append(item)`
			`elif role == "enabler":`
			`enabler.append(item)`
			`elif role == "support":`
			`support.append(item)`
			`else:`
			`wildcard.append(item)`
			`seed = _seed_from(theme, commander)`
			`for bucket in (payoff, enabler, support, wildcard):`
			`_deterministic_shuffle(bucket, seed)`
			`bucket.sort(key=lambda x: (-x["score"], x["name"]))`
			`target_payoff = max(1, int(round(limit * 0.4)))`
			`target_enabler_support = max(1, int(round(limit * 0.4)))`
			`target_wild = max(0, limit - target_payoff - target_enabler_support)`

			`def take(n: int, source: List[SampledCard]):`
			`for i in range(min(n, len(source))):`
			`yield source[i]`

			`chosen: List[SampledCard] = []`
			`chosen.extend(take(target_payoff, payoff))`
			`es_combined = enabler + support`
			`chosen.extend(take(target_enabler_support, es_combined))`
			`chosen.extend(take(target_wild, wildcard))`

			`if len(chosen) < limit:`
			`def fill_from(src: List[SampledCard]):`
			`nonlocal chosen`
			`for it in src:`
			`if len(chosen) >= limit:`
			`break`
			`if it not in chosen:`
			`chosen.append(it)`
			`for bucket in (payoff, enabler, support, wildcard):`
			`fill_from(bucket)`

			`role_soft_caps = {`
			`"payoff": int(round(limit * 0.5)),`
			`"enabler": int(round(limit * 0.35)),`
			`"support": int(round(limit * 0.35)),`
			`"wildcard": int(round(limit * 0.25)),`
			`}`
			`role_seen: Dict[str, int] = {k: 0 for k in role_soft_caps}`
			`for it in chosen:`
			`r = (it.get("roles") or [None])[0]`
			`if not r or r not in role_soft_caps:`
			`continue`
			`role_seen[r] += 1`
			`if role_seen[r] > max(1, role_soft_caps[r]):`
			`it["score"] = it.get("score", 0) + ROLE_SATURATION_PENALTY # negative value`
			`(it.setdefault("reasons", [])).append(f"role_saturation_penalty:{ROLE_SATURATION_PENALTY}")`
			`if len(chosen) > limit:`
			`chosen = chosen[:limit]`
			`return chosen`

			`# Expose overlap scale for unit tests`
			`commander_overlap_scale = _commander_overlap_scale`