diff --git a/.env.example b/.env.example index 43dbd8c..2fcc200 100644 --- a/.env.example +++ b/.env.example @@ -92,6 +92,12 @@ WEB_AUTO_REFRESH_DAYS=7 # dockerhub: WEB_AUTO_REFRESH_DAYS="7" WEB_TAG_PARALLEL=1 # dockerhub: WEB_TAG_PARALLEL="1" WEB_TAG_WORKERS=2 # dockerhub: WEB_TAG_WORKERS="4" WEB_AUTO_ENFORCE=0 # dockerhub: WEB_AUTO_ENFORCE="0" + +# Tagging Refinement Feature Flags +TAG_NORMALIZE_KEYWORDS=1 # dockerhub: TAG_NORMALIZE_KEYWORDS="1" # Normalize keywords & filter specialty mechanics +TAG_PROTECTION_GRANTS=1 # dockerhub: TAG_PROTECTION_GRANTS="1" # Protection tag only for cards granting shields +TAG_METADATA_SPLIT=1 # dockerhub: TAG_METADATA_SPLIT="1" # Separate metadata tags from themes in CSVs + # DFC_COMPAT_SNAPSHOT=0 # 1=write legacy unmerged MDFC snapshots alongside merged catalogs (deprecated compatibility workflow) # WEB_CUSTOM_EXPORT_BASE= # Custom basename for exports (optional). # THEME_CATALOG_YAML_SCAN_INTERVAL_SEC=2.0 # Poll for YAML changes (dev) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5f4ce0..ba01974 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,27 +9,60 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning ## [Unreleased] ### Summary +- Card tagging system improvements split metadata from gameplay themes for cleaner deck building experience - Keyword normalization reduces specialty keyword noise by 96% while maintaining theme catalog quality - Protection tag now focuses on cards that grant shields to others, not just those with inherent protection - Web UI improvements: faster polling, fixed progress display, and theme refresh stability +- **Protection System Overhaul**: Comprehensive enhancement to protection card detection, classification, and deck building + - Fine-grained scope metadata distinguishes self-protection from board-wide effects ("Your Permanents: Hexproof" vs "Self: Hexproof") + - Enhanced grant detection with Equipment/Aura patterns, phasing support, and complex trigger handling + - Intelligent deck builder filtering includes board-relevant protection while excluding self-only and type-specific cards + - Tiered pool limiting focuses on high-quality staples while maintaining variety across builds + - Improved scope tagging for cards with keyword-only protection effects (no grant text, just inherent keywords) ### Added +- Metadata partition system separates diagnostic tags from gameplay themes in card data - Keyword normalization system with smart filtering of one-off specialty mechanics - Allowlist preserves important keywords like Flying, Myriad, and Transform - Protection grant detection identifies cards that give Hexproof, Ward, or Indestructible to other permanents - Automatic tagging for creature-type-specific protection (e.g., "Knights Gain Protection") +- New `metadataTags` column in card data for bracket annotations and internal diagnostics +- Static phasing keyword detection from keywords field (catches creatures like Breezekeeper) +- "Other X you control have Y" protection pattern for static ability grants +- "Enchanted creature has phasing" pattern detection +- Chosen type blanket phasing patterns +- Complex trigger phasing patterns (reactive, consequent, end-of-turn) +- Protection scope filtering in deck builder (feature flag: `TAG_PROTECTION_SCOPE`) intelligently selects board-relevant protection +- Phasing cards with "Your Permanents:" or "Targeted:" metadata now tagged as Protection and included in protection pool +- Metadata tags temporarily visible in card hover previews for debugging (shows scope like "Your Permanents: Hexproof") ### Changed +- Card tags now split between themes (for deck building) and metadata (for diagnostics) - Keywords now consolidate variants (e.g., "Commander ninjutsu" becomes "Ninjutsu") - Setup progress polling reduced from 3s to 5-10s intervals for better performance - Theme catalog streamlined from 753 to 736 themes (-2.3%) with improved quality - Protection tag refined to focus on 329 cards that grant shields (down from 1,166 with inherent effects) +- Theme catalog automatically excludes metadata tags from theme suggestions +- Grant detection now strips reminder text before pattern matching to avoid false positives +- Deck builder protection phase now filters by scope metadata: includes "Your Permanents:", excludes "Self:" protection +- Protection card selection now randomized per build for variety (using seeded RNG when deterministic mode enabled) +- Protection pool now limited to ~40-50 high-quality cards (tiered selection: top 3x target + random 10-20 extras) ### Fixed - Setup progress now shows 100% completion instead of getting stuck at 99% - Theme catalog no longer continuously regenerates after setup completes - Health indicator polling optimized to reduce server load - Protection detection now correctly excludes creatures with only inherent keywords +- Dive Down, Glint no longer falsely identified as granting to opponents (reminder text fix) +- Drogskol Captain, Haytham Kenway now correctly get "Your Permanents" scope tags +- 7 cards with static Phasing keyword now properly detected (Breezekeeper, Teferi's Drake, etc.) +- Type-specific protection grants (e.g., "Knights Gain Indestructible") now correctly excluded from general protection pool +- Protection scope filter now properly prioritizes exclusions over inclusions (fixes Knight Exemplar in non-Knight decks) +- Inherent protection cards (Aysen Highway, Phantom Colossus, etc.) now correctly get "Self: Protection" metadata tags +- Scope tagging now applies to ALL cards with protection effects, not just grant cards +- Cloak of Invisibility, Teferi's Curse now get "Your Permanents: Phasing" tags +- Shimmer now gets "Blanket: Phasing" tag for chosen type effect +- King of the Oathbreakers now gets "Self: Phasing" tag for reactive trigger ## [2.5.2] - 2025-10-08 ### Summary diff --git a/RELEASE_NOTES_TEMPLATE.md b/RELEASE_NOTES_TEMPLATE.md index d35313a..c32861c 100644 --- a/RELEASE_NOTES_TEMPLATE.md +++ b/RELEASE_NOTES_TEMPLATE.md @@ -1,45 +1,61 @@ -# MTG Pyt### Added -- Keywo### Changed -- Keywords consolidate variants (e.g., "Commander ninjutsu" → "Ninjutsu") for consistent theme matching -- Protection tag refined to focus on shield-granting cards (329 cards vs 1,166 previously) -- Theme catalog streamlined with improved quality (736 themes, down 2.3%) -- Commander search and theme picker now share an intelligent debounce to prevent redundant requests while typing -- Card grids adopt modern containment rules to minimize layout recalculations on large decks -- Include/exclude buttons respond immediately with optimistic updates, reconciling gracefully if the server disagrees -- Frequently accessed views, like the commander catalog default, now pull from an in-memory cache for sub-200 ms reloads -- Deck review loads in focused chunks, keeping the initial page lean while analytics stream progressively -- Chart hover zones expand to full column width for easier interactionnup filters out one-off specialty mechanics (like set-specific ability words) while keeping evergreen abilities -- Protection grant detection identifies cards that give Hexproof, Ward, or other shields to your permanents -- Creature-type-specific protection automatically tagged (e.g., "Knights Gain Protection" for tribal strategies) -- Skeleton placeholders accept `data-skeleton-label` microcopy and only surface after ~400 ms across the build wizard, stage navigator, and alternatives panel -- Must-have toggle API (`/build/must-haves/toggle`), telemetry ingestion route (`/telemetry/events`), and structured logging helpers capture include/exclude beacons -- Commander catalog results wrap in a deferred skeleton list while commander art lazy-loads via a new `IntersectionObserver` helper in `code/web/static/app.js` -- Collapsible accordions for Mana Overview and Test Hand sections defer heavy analytics until they are expanded -- Click-to-pin chart tooltips keep comparisons anchored and add copy-friendly working buttons -- Virtualized card lists automatically render only visible items once 12+ cards are presentkbuilder ${VERSION} +# MTG Python Deckbuilder ${VERSION} + +## [Unreleased] ### Summary -- Smarter card tagging: Keywords are cleaner (96% noise reduction) and Protection now highlights cards that actually grant shields to your board -- Builder responsiveness upgrades: smarter HTMX caching, shared debounce helpers, and virtualization hints keep long card lists responsive -- Commander catalog now ships skeleton placeholders, lazy commander art loading, and cached default results for faster repeat visits -- Deck summary streams via an HTMX fragment while virtualization powers summary lists without loading every row up front -- Mana analytics load on demand with collapsible sections and interactive chart tooltips that support click-to-pin comparisons +- Card tagging improvements separate gameplay themes from internal metadata for cleaner deck building +- Keyword cleanup reduces specialty keyword noise by 96% while keeping important mechanics +- Protection tag now highlights cards that grant shields to your board, not just inherent protection +- **Protection System Overhaul**: Smarter card detection, scope-aware filtering, and focused pool selection deliver consistent, high-quality protection card recommendations + - Deck builder distinguishes between board-wide protection and self-only effects using fine-grained metadata + - Intelligent pool limiting focuses on high-quality staples while maintaining variety across builds + - Scope-aware filtering automatically excludes self-protection and type-specific cards that don't match your deck + - Enhanced detection handles Equipment, Auras, phasing effects, and complex triggers correctly +- Web UI responsiveness upgrades with smarter caching and streamlined loading ### Added -- Skeleton placeholders accept `data-skeleton-label` microcopy and only surface after ~400 ms across the build wizard, stage navigator, and alternatives panel. -- Must-have toggle API (`/build/must-haves/toggle`), telemetry ingestion route (`/telemetry/events`), and structured logging helpers capture include/exclude beacons. -- Commander catalog results wrap in a deferred skeleton list while commander art lazy-loads via a new `IntersectionObserver` helper in `code/web/static/app.js`. -- Collapsible accordions for Mana Overview and Test Hand sections defer heavy analytics until they are expanded. -- Click-to-pin chart tooltips keep comparisons anchored and add copy-friendly working buttons. -- Virtualized card lists automatically render only visible items once 12+ cards are present. +- Metadata partition keeps internal tags separate from gameplay themes +- Keyword normalization filters out one-off specialty mechanics while keeping evergreen abilities +- Protection grant detection identifies cards that give Hexproof, Ward, or other shields to your permanents +- Creature-type-specific protection automatically tagged (e.g., "Knights Gain Protection" for tribal strategies) +- Protection scope filtering (feature flag: `TAG_PROTECTION_SCOPE`) automatically excludes self-only protection like Svyelun +- Phasing cards with protective effects now included in protection pool (e.g., cards that phase out your permanents) +- Debug mode: Hover over cards to see metadata tags showing protection scope (e.g., "Your Permanents: Hexproof") +- Skeleton placeholders with smart timing across build wizard and commander catalog +- Must-have toggle API with telemetry tracking for include/exclude interactions +- Commander catalog lazy-loads art and caches frequently accessed views +- Collapsible sections for mana analytics defer loading until expanded +- Click-to-pin chart tooltips for easier card comparisons +- Virtualized card lists handle large decks smoothly ### Changed -- Commander search and theme picker now share an intelligent debounce to prevent redundant requests while typing. -- Card grids adopt modern containment rules to minimize layout recalculations on large decks. -- Include/exclude buttons respond immediately with optimistic updates, reconciling gracefully if the server disagrees. -- Frequently accessed views, like the commander catalog default, now pull from an in-memory cache for sub-200 ms reloads. -- Deck review loads in focused chunks, keeping the initial page lean while analytics stream progressively. -- Chart hover zones expand to full column width for easier interaction. +- Card tags now split between themes (for deck building) and metadata (for diagnostics) +- Keywords consolidate variants (e.g., "Commander ninjutsu" → "Ninjutsu") for consistent theme matching +- Protection tag refined to focus on shield-granting cards (329 cards vs 1,166 previously) +- Deck builder protection phase filters by scope: includes "Your Permanents:", excludes "Self:" protection +- Protection card selection randomized for variety across builds (deterministic when using seeded mode) +- Theme catalog streamlined with improved quality (736 themes, down 2.3%) +- Theme catalog automatically excludes metadata tags from suggestions +- Commander search and theme picker share intelligent debounce to prevent redundant requests +- Include/exclude buttons respond immediately with optimistic updates +- Commander catalog default view loads from cache for sub-200ms response times +- Deck review loads in focused chunks for faster initial page loads +- Chart hover zones expanded for easier interaction ### Fixed -- _None_ +### Fixed +- Setup progress correctly displays 100% upon completion +- Theme catalog refresh stability improved after initial setup +- Server polling optimized for reduced load +- Protection detection accurately filters inherent vs granted effects +- Protection scope detection improvements for 11+ cards: + - Dive Down, Glint no longer falsely marked as opponent grants (reminder text now stripped) + - Drogskol Captain and similar cards with "Other X you control have Y" patterns now tagged correctly + - 7 cards with static Phasing keyword now detected (Breezekeeper, Teferi's Drake, etc.) + - Cloak of Invisibility and Teferi's Curse now get "Your Permanents: Phasing" tags + - Shimmer now gets "Blanket: Phasing" for chosen type effect + - King of the Oathbreakers reactive trigger now properly detected +- Type-specific protection (Knight Exemplar, Timber Protector) no longer added to non-matching decks +- Deck builder correctly excludes "Self:" protection cards (e.g., Svyelun) from protection pool +- Inherent protection cards (Aysen Highway, Phantom Colossus) now correctly receive scope metadata tags +- Protection pool now intelligently limited to focus on high-quality, relevant cards for your deck diff --git a/_tmp_check_metrics.py b/_tmp_check_metrics.py deleted file mode 100644 index 8bf5e40..0000000 --- a/_tmp_check_metrics.py +++ /dev/null @@ -1,5 +0,0 @@ -import urllib.request, json -raw = urllib.request.urlopen("http://localhost:8000/themes/metrics").read().decode() -js=json.loads(raw) -print('example_enforcement_active=', js.get('preview',{}).get('example_enforcement_active')) -print('example_enforce_threshold_pct=', js.get('preview',{}).get('example_enforce_threshold_pct')) diff --git a/_tmp_run_orchestrator.py b/_tmp_run_orchestrator.py deleted file mode 100644 index 854aa1d..0000000 --- a/_tmp_run_orchestrator.py +++ /dev/null @@ -1,3 +0,0 @@ -from code.web.services import orchestrator -orchestrator._ensure_setup_ready(print, force=False) -print('DONE') \ No newline at end of file diff --git a/code/deck_builder/builder.py b/code/deck_builder/builder.py index a7a5d53..b08a718 100644 --- a/code/deck_builder/builder.py +++ b/code/deck_builder/builder.py @@ -1759,6 +1759,7 @@ class DeckBuilder( entry['Synergy'] = synergy else: # If no tags passed attempt enrichment from filtered pool first, then full snapshot + metadata_tags: list[str] = [] if not tags: # Use filtered pool (_combined_cards_df) instead of unfiltered (_full_cards_df) # This ensures exclude filtering is respected during card enrichment @@ -1774,6 +1775,13 @@ class DeckBuilder( # tolerate comma separated parts = [p.strip().strip("'\"") for p in raw_tags.split(',')] tags = [p for p in parts if p] + # M5: Extract metadata tags for web UI display + raw_meta = row_match.iloc[0].get('metadataTags', []) + if isinstance(raw_meta, list): + metadata_tags = [str(t).strip() for t in raw_meta if str(t).strip()] + elif isinstance(raw_meta, str) and raw_meta.strip(): + parts = [p.strip().strip("'\"") for p in raw_meta.split(',')] + metadata_tags = [p for p in parts if p] except Exception: pass # Enrich missing type and mana_cost for accurate categorization @@ -1811,6 +1819,7 @@ class DeckBuilder( 'Mana Value': mana_value, 'Creature Types': creature_types, 'Tags': tags, + 'MetadataTags': metadata_tags, # M5: Store metadata tags for web UI 'Commander': is_commander, 'Count': 1, 'Role': (role or ('commander' if is_commander else None)), diff --git a/code/deck_builder/phases/phase4_spells.py b/code/deck_builder/phases/phase4_spells.py index 76ff0c9..3ec39fb 100644 --- a/code/deck_builder/phases/phase4_spells.py +++ b/code/deck_builder/phases/phase4_spells.py @@ -539,6 +539,10 @@ class SpellAdditionMixin: """Add protection spells to the deck. Selects cards tagged as 'protection', prioritizing by EDHREC rank and mana value. Avoids duplicates and commander card. + + M5: When TAG_PROTECTION_SCOPE is enabled, filters to include only cards that + protect your board (Your Permanents:, {Type} Gain) and excludes self-only or + opponent protection cards. """ target = self.ideal_counts.get('protection', 0) if target <= 0 or self._combined_cards_df is None: @@ -546,14 +550,88 @@ class SpellAdditionMixin: already = {n.lower() for n in self.card_library.keys()} df = self._combined_cards_df.copy() df['_ltags'] = df.get('themeTags', []).apply(bu.normalize_tag_cell) - pool = df[df['_ltags'].apply(lambda tags: any('protection' in t for t in tags))] + + # M5: Apply scope-based filtering if enabled + import settings as s + if getattr(s, 'TAG_PROTECTION_SCOPE', True): + # Check metadata tags for scope information + df['_meta_tags'] = df.get('metadataTags', []).apply(bu.normalize_tag_cell) + + def is_board_relevant_protection(row): + """Check if protection card helps protect your board. + + Includes: + - Cards with "Your Permanents:" metadata (board-wide protection) + - Cards with "Blanket:" metadata (affects all permanents) + - Cards with "Targeted:" metadata (can target your stuff) + - Legacy cards without metadata tags + + Excludes: + - "Self:" protection (only protects itself) + - "Opponent Permanents:" protection (helps opponents) + - Type-specific grants like "Knights Gain" (too narrow, handled by kindred synergies) + """ + theme_tags = row.get('_ltags', []) + meta_tags = row.get('_meta_tags', []) + + # First check if it has general protection tag + has_protection = any('protection' in t for t in theme_tags) + if not has_protection: + return False + + # INCLUDE: Board-relevant scopes + # "Your Permanents:", "Blanket:", "Targeted:" + has_board_scope = any( + 'your permanents:' in t or 'blanket:' in t or 'targeted:' in t + for t in meta_tags + ) + + # EXCLUDE: Self-only, opponent protection, or type-specific grants + # Check for type-specific grants FIRST (highest priority exclusion) + has_type_specific = any( + ' gain ' in t.lower() # "Knights Gain", "Treefolk Gain", etc. + for t in meta_tags + ) + + has_excluded_scope = any( + 'self:' in t or + 'opponent permanents:' in t + for t in meta_tags + ) + + # Include if board-relevant, or if no scope tags (legacy cards) + # ALWAYS exclude type-specific grants (too narrow for general protection) + if meta_tags: + # Has metadata - use it for filtering + # Exclude if type-specific OR self/opponent + if has_type_specific or has_excluded_scope: + return False + # Otherwise include if board-relevant + return has_board_scope + else: + # No metadata - legacy card, include by default + return True + + pool = df[df.apply(is_board_relevant_protection, axis=1)] + + # Log scope filtering stats + original_count = len(df[df['_ltags'].apply(lambda tags: any('protection' in t for t in tags))]) + filtered_count = len(pool) + if original_count > filtered_count: + self.output_func(f"Protection scope filter: {filtered_count}/{original_count} cards (excluded {original_count - filtered_count} self-only/opponent cards)") + else: + # Legacy behavior: include all cards with 'protection' tag + pool = df[df['_ltags'].apply(lambda tags: any('protection' in t for t in tags))] + pool = pool[~pool['type'].fillna('').str.contains('Land', case=False, na=False)] commander_name = getattr(self, 'commander', None) if commander_name: pool = pool[pool['name'] != commander_name] pool = self._apply_bracket_pre_filters(pool) pool = bu.sort_by_priority(pool, ['edhrecRank','manaValue']) + self._debug_dump_pool(pool, 'protection') + try: if str(os.getenv('DEBUG_SPELL_POOLS', '')).strip().lower() in {"1","true","yes","on"}: names = pool['name'].astype(str).head(30).tolist() @@ -580,6 +658,48 @@ class SpellAdditionMixin: if existing >= target and to_add == 0: return target = to_add if existing < target else to_add + + # M5: Limit pool size to manageable tier-based selection + # Strategy: Top tier (3x target) + random deeper selection + # This keeps the pool focused on high-quality options (~50-70 cards typical) + original_pool_size = len(pool) + if len(pool) > 0 and target > 0: + try: + # Tier 1: Top quality cards (3x target count) + tier1_size = min(3 * target, len(pool)) + tier1 = pool.head(tier1_size).copy() + + # Tier 2: Random additional cards from remaining pool (10-20 cards) + if len(pool) > tier1_size: + remaining_pool = pool.iloc[tier1_size:].copy() + tier2_size = min( + self.rng.randint(10, 20) if hasattr(self, 'rng') and self.rng else 15, + len(remaining_pool) + ) + if hasattr(self, 'rng') and self.rng and len(remaining_pool) > tier2_size: + # Use random.sample() to select random indices from the remaining pool + tier2_indices = self.rng.sample(range(len(remaining_pool)), tier2_size) + tier2 = remaining_pool.iloc[tier2_indices] + else: + tier2 = remaining_pool.head(tier2_size) + pool = tier1._append(tier2, ignore_index=True) + else: + pool = tier1 + + if len(pool) != original_pool_size: + self.output_func(f"Protection pool limited: {len(pool)}/{original_pool_size} cards (tier1: {tier1_size}, tier2: {len(pool) - tier1_size})") + except Exception as e: + self.output_func(f"Warning: Pool limiting failed, using full pool: {e}") + + # Shuffle pool for variety across builds (using seeded RNG for determinism) + try: + if hasattr(self, 'rng') and self.rng is not None: + pool_list = pool.to_dict('records') + self.rng.shuffle(pool_list) + import pandas as pd + pool = pd.DataFrame(pool_list) + except Exception: + pass added = 0 added_names: List[str] = [] for _, r in pool.iterrows(): diff --git a/code/deck_builder/phases/phase6_reporting.py b/code/deck_builder/phases/phase6_reporting.py index c1fa136..b71fcc0 100644 --- a/code/deck_builder/phases/phase6_reporting.py +++ b/code/deck_builder/phases/phase6_reporting.py @@ -878,7 +878,7 @@ class ReportingMixin: headers = [ "Name","Count","Type","ManaCost","ManaValue","Colors","Power","Toughness", - "Role","SubRole","AddedBy","TriggerTag","Synergy","Tags","Text","DFCNote","Owned" + "Role","SubRole","AddedBy","TriggerTag","Synergy","Tags","MetadataTags","Text","DFCNote","Owned" ] header_suffix: List[str] = [] @@ -946,6 +946,9 @@ class ReportingMixin: role = info.get('Role', '') or '' tags = info.get('Tags', []) or [] tags_join = '; '.join(tags) + # M5: Include metadata tags in export + metadata_tags = info.get('MetadataTags', []) or [] + metadata_tags_join = '; '.join(metadata_tags) text_field = '' colors = '' power = '' @@ -1014,6 +1017,7 @@ class ReportingMixin: info.get('TriggerTag') or '', info.get('Synergy') if info.get('Synergy') is not None else '', tags_join, + metadata_tags_join, # M5: Include metadata tags text_field[:800] if isinstance(text_field, str) else str(text_field)[:800], dfc_note, owned_flag diff --git a/code/file_setup/setup.py b/code/file_setup/setup.py index db6ad82..b377017 100644 --- a/code/file_setup/setup.py +++ b/code/file_setup/setup.py @@ -2,7 +2,23 @@ This module provides the main setup functionality for the MTG Python Deckbuilder application. It handles initial setup tasks such as downloading card data, -creating color-filtered card lists, and generating commander-eligible card lists. +creating color-filtered card lists, and gener logger.info(f'Downloading latest card data for {color} cards') + download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv') + + logger.info('Loading and processing card data') + try: + df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False) + except pd.errors.ParserError as e: + logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...') + df = pd.read_csv( + f'{CSV_DIRECTORY}/cards.csv', + low_memory=False, + on_bad_lines='warn', # Warn about malformed rows but continue + encoding_errors='replace' # Replace bad encoding chars + ) + logger.info('Successfully loaded card data with error handling (some rows may have been skipped)') + + logger.info(f'Regenerating {color} cards CSV')der-eligible card lists. Key Features: - Initial setup and configuration @@ -197,7 +213,17 @@ def regenerate_csvs_all() -> None: download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv') logger.info('Loading and processing card data') - df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False) + try: + df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False) + except pd.errors.ParserError as e: + logger.warning(f'CSV parsing error encountered: {e}. Retrying with error handling...') + df = pd.read_csv( + f'{CSV_DIRECTORY}/cards.csv', + low_memory=False, + on_bad_lines='warn', # Warn about malformed rows but continue + encoding_errors='replace' # Replace bad encoding chars + ) + logger.info(f'Successfully loaded card data with error handling (some rows may have been skipped)') logger.info('Regenerating color identity sorted files') save_color_filtered_csvs(df, CSV_DIRECTORY) @@ -234,7 +260,12 @@ def regenerate_csv_by_color(color: str) -> None: download_cards_csv(MTGJSON_API_URL, f'{CSV_DIRECTORY}/cards.csv') logger.info('Loading and processing card data') - df = pd.read_csv(f'{CSV_DIRECTORY}/cards.csv', low_memory=False) + df = pd.read_csv( + f'{CSV_DIRECTORY}/cards.csv', + low_memory=False, + on_bad_lines='skip', # Skip malformed rows (MTGJSON CSV has escaping issues) + encoding_errors='replace' # Replace bad encoding chars + ) logger.info(f'Regenerating {color} cards CSV') # Use shared utilities to base-filter once then slice color, honoring bans diff --git a/code/settings.py b/code/settings.py index 5731031..101b4d5 100644 --- a/code/settings.py +++ b/code/settings.py @@ -102,14 +102,17 @@ FILL_NA_COLUMNS: Dict[str, Optional[str]] = { } # ---------------------------------------------------------------------------------- -# TAGGING REFINEMENT FEATURE FLAGS (M1-M3) +# TAGGING REFINEMENT FEATURE FLAGS (M1-M5) # ---------------------------------------------------------------------------------- -# M1: Enable keyword normalization and singleton pruning +# M1: Enable keyword normalization and singleton pruning (completed) TAG_NORMALIZE_KEYWORDS = os.getenv('TAG_NORMALIZE_KEYWORDS', '1').lower() not in ('0', 'false', 'off', 'disabled') -# M2: Enable protection grant detection (planned) -TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECT ION_GRANTS', '0').lower() not in ('0', 'false', 'off', 'disabled') +# M2: Enable protection grant detection (completed) +TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECTION_GRANTS', '1').lower() not in ('0', 'false', 'off', 'disabled') -# M3: Enable metadata/theme partition (planned) -TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '0').lower() not in ('0', 'false', 'off', 'disabled') \ No newline at end of file +# M3: Enable metadata/theme partition (completed) +TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '1').lower() not in ('0', 'false', 'off', 'disabled') + +# M5: Enable protection scope filtering in deck builder (completed - Phase 1-3, in progress Phase 4+) +TAG_PROTECTION_SCOPE = os.getenv('TAG_PROTECTION_SCOPE', '1').lower() not in ('0', 'false', 'off', 'disabled') \ No newline at end of file diff --git a/code/tagging/phasing_scope_detection.py b/code/tagging/phasing_scope_detection.py new file mode 100644 index 0000000..b16a3d8 --- /dev/null +++ b/code/tagging/phasing_scope_detection.py @@ -0,0 +1,206 @@ +""" +Phasing Scope Detection Module + +Detects the scope of phasing effects with multiple dimensions: +- Targeted: Phasing (any targeting effect) +- Self: Phasing (phases itself out) +- Your Permanents: Phasing (phases your permanents out) +- Opponent Permanents: Phasing (phases opponent permanents - removal) +- Blanket: Phasing (phases all permanents out) + +Cards can have multiple scope tags (e.g., Targeted + Your Permanents). +""" + +import re +from typing import Set +from code.logging_util import get_logger + +logger = get_logger(__name__) + + +def get_phasing_scope_tags(text: str, card_name: str, keywords: str = '') -> Set[str]: + """ + Get all phasing scope metadata tags for a card. + + A card can have multiple scope tags: + - "Targeted: Phasing" - Uses targeting + - "Self: Phasing" - Phases itself out + - "Your Permanents: Phasing" - Phases your permanents + - "Opponent Permanents: Phasing" - Phases opponent permanents (removal) + - "Blanket: Phasing" - Phases all permanents + + Args: + text: Card text + card_name: Card name + keywords: Card keywords (to check for static "Phasing" ability) + + Returns: + Set of metadata tags + """ + if not card_name: + return set() + + text_lower = text.lower() if text else '' + keywords_lower = keywords.lower() if keywords else '' + tags = set() + + # Check for static "Phasing" keyword ability (self-phasing) + # Only add Self tag if card doesn't grant phasing to others + if 'phasing' in keywords_lower: + # Remove reminder text to avoid false positives + text_no_reminder = re.sub(r'\([^)]*\)', '', text_lower) + + # Check if card grants phasing to others (has granting language in main text) + # Look for patterns like "enchanted creature has", "other X have", "target", etc. + grants_to_others = bool(re.search( + r'(other|target|each|all|enchanted|equipped|creatures? you control|permanents? you control).*phas', + text_no_reminder + )) + + # If no granting language, it's just self-phasing + if not grants_to_others: + tags.add('Self: Phasing') + return tags # Early return - static keyword only + + # Check if phasing is mentioned in text (including "has phasing", "gain phasing", etc.) + if 'phas' not in text_lower: # Changed from 'phase' to 'phas' to catch "phasing" too + return tags + + # Check for targeting (any "target" + phasing) + # Targeting detection - must have target AND phase in same sentence/clause + targeting_patterns = [ + r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', + r'target\s+player\s+controls[^.]*phases?\s+out', + ] + + is_targeted = any(re.search(pattern, text_lower) for pattern in targeting_patterns) + + if is_targeted: + tags.add("Targeted: Phasing") + logger.debug(f"Card '{card_name}': detected Targeted: Phasing") + + # Check for self-phasing + self_patterns = [ + r'this\s+(?:creature|permanent|artifact|enchantment)\s+phases?\s+out', + r'~\s+phases?\s+out', + rf'\b{re.escape(card_name.lower())}\s+phases?\s+out', + # NEW: Triggered self-phasing (King of the Oathbreakers: "it phases out" as reactive protection) + r'whenever.*(?:becomes\s+the\s+target|becomes\s+target).*(?:it|this\s+creature)\s+phases?\s+out', + # NEW: Consequent self-phasing (Cyclonus: "connive. Then...phase out") + r'(?:then|,)\s+(?:it|this\s+creature)\s+phases?\s+out', + # NEW: At end of turn/combat self-phasing + r'(?:at\s+(?:the\s+)?end\s+of|after).*(?:it|this\s+creature)\s+phases?\s+out', + ] + + if any(re.search(pattern, text_lower) for pattern in self_patterns): + tags.add("Self: Phasing") + logger.debug(f"Card '{card_name}': detected Self: Phasing") + + # Check for opponent permanent phasing (removal effect) + opponent_patterns = [ + r'target\s+(?:\w+\s+)*(?:creature|permanent)\s+an?\s+opponents?\s+controls?\s+phases?\s+out', + ] + + # Check for unqualified targets (can target opponents' stuff) + # More flexible to handle various phasing patterns + unqualified_target_patterns = [ + r'(?:up\s+to\s+)?(?:one\s+|x\s+|that\s+many\s+)?(?:other\s+)?(?:another\s+)?target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', + r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|land|nonland\s+permanent)(?:,|\s+and)?\s+(?:then|and)?\s+it\s+phases?\s+out', + ] + + has_opponent_specific = any(re.search(pattern, text_lower) for pattern in opponent_patterns) + has_unqualified_target = any(re.search(pattern, text_lower) for pattern in unqualified_target_patterns) + + # If unqualified AND not restricted to "you control", can target opponents + if has_opponent_specific or (has_unqualified_target and 'you control' not in text_lower): + tags.add("Opponent Permanents: Phasing") + logger.debug(f"Card '{card_name}': detected Opponent Permanents: Phasing") + + # Check for your permanents phasing + your_patterns = [ + # Explicit "you control" + r'(?:target\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', + r'(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?)\s+you\s+control\s+phases?\s+out', + r'permanents?\s+you\s+control\s+phase\s+out', + r'(?:any|up\s+to)\s+(?:number\s+of\s+)?(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', + r'all\s+(?:creatures?|permanents?)\s+you\s+control\s+phase\s+out', + r'each\s+(?:creature|permanent)\s+you\s+control\s+phases?\s+out', + # Pronoun reference to "you control" context + r'(?:creatures?|permanents?|planeswalkers?)\s+you\s+control[^.]*(?:those|the)\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', + r'creature\s+you\s+control[^.]*(?:it)\s+phases?\s+out', + # "Those permanents" referring back to controlled permanents (across sentence boundaries) + r'you\s+control.*those\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', + # Equipment/Aura (beneficial to your permanents) + r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', + r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', + r'enchanted\s+(?:creature|permanent)\s+(?:has|gains?)\s+phasing', # NEW: "has phasing" for Cloak of Invisibility, Teferi's Curse + # Pronoun reference after equipped/enchanted creature mentioned + r'(?:equipped|enchanted)\s+(?:creature|permanent)[^.]*,?\s+(?:then\s+)?that\s+(?:creature|permanent)\s+phases?\s+out', + # Target controlled by specific player + r'(?:each|target)\s+(?:creature|permanent)\s+target\s+player\s+controls\s+phases?\s+out', + ] + + if any(re.search(pattern, text_lower) for pattern in your_patterns): + tags.add("Your Permanents: Phasing") + logger.debug(f"Card '{card_name}': detected Your Permanents: Phasing") + + # Check for blanket phasing (all permanents, no ownership) + blanket_patterns = [ + r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)(?:\s+of\s+that\s+type)?\s+(?:[^.]*\s+)?phase\s+out', + r'each\s+(?:creature|permanent)\s+(?:[^.]*\s+)?phases?\s+out', + # NEW: Type-specific blanket (Shimmer: "Each land of the chosen type has phasing") + r'each\s+(?:land|creature|permanent|artifact|enchantment)\s+of\s+the\s+chosen\s+type\s+has\s+phasing', + r'(?:lands?|creatures?|permanents?|artifacts?|enchantments?)\s+of\s+the\s+chosen\s+type\s+(?:have|has)\s+phasing', + # Pronoun reference to "all creatures" + r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)[^.]*,?\s+(?:then\s+)?(?:those|the)\s+(?:creatures?|permanents?)\s+phase\s+out', + ] + + # Only blanket if no specific ownership mentioned + has_blanket_pattern = any(re.search(pattern, text_lower) for pattern in blanket_patterns) + no_ownership = 'you control' not in text_lower and 'target player controls' not in text_lower and 'opponent' not in text_lower + + if has_blanket_pattern and no_ownership: + tags.add("Blanket: Phasing") + logger.debug(f"Card '{card_name}': detected Blanket: Phasing") + + return tags + + +def has_phasing(text: str) -> bool: + """ + Quick check if card text contains phasing keywords. + + Args: + text: Card text + + Returns: + True if phasing keyword found + """ + if not text: + return False + + text_lower = text.lower() + + # Check for phasing keywords + phasing_keywords = [ + 'phase out', + 'phases out', + 'phasing', + 'phase in', + 'phases in', + ] + + return any(keyword in text_lower for keyword in phasing_keywords) + + +def is_removal_phasing(tags: Set[str]) -> bool: + """ + Check if phasing effect acts as removal (targets opponent permanents). + + Args: + tags: Set of phasing scope tags + + Returns: + True if this is removal-style phasing + """ + return "Opponent Permanents: Phasing" in tags diff --git a/code/tagging/protection_grant_detection.py b/code/tagging/protection_grant_detection.py index dca37b4..a88a86b 100644 --- a/code/tagging/protection_grant_detection.py +++ b/code/tagging/protection_grant_detection.py @@ -50,18 +50,23 @@ def _init_kindred_patterns(): # Grant verb patterns - cards that give protection to other permanents # These patterns look for grant verbs that affect OTHER permanents, not self +# M5: Added phasing support GRANT_VERB_PATTERNS = [ - r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection)\b', # "gets +X/+X and has" pattern + r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', + r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', + r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', + r'\bhave\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "have hexproof" static grants + r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "gets +X/+X and has hexproof" direct + r'\bget[s]?\b.*\+.*\band\b.*\b(gain[s]?|have)\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "gets +X/+X and gains hexproof" + r'\bphases? out\b', # M5: Direct phasing triggers (e.g., "it phases out") ] # Self-reference patterns that should NOT count as granting # Reminder text and keyword lines only +# M5: Added phasing support SELF_REFERENCE_PATTERNS = [ - r'^\s*(hexproof|shroud|indestructible|ward|protection)', # Start of text (keyword ability) - r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection)[^)]*\)', # Reminder text in parens + r'^\s*(hexproof|shroud|indestructible|ward|protection|phasing)', # Start of text (keyword ability) + r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection|phasing)[^)]*\)', # Reminder text in parens ] # Conditional self-grant patterns - activated/triggered abilities that grant to self @@ -109,13 +114,22 @@ EXCLUSION_PATTERNS = [ ] # Opponent grant patterns - grants to opponent's permanents (EXCLUDE these) +# NOTE: "all creatures" and "all permanents" are BLANKET effects (help you too), +# not opponent grants. Only exclude effects that ONLY help opponents. OPPONENT_GRANT_PATTERNS = [ r'target opponent', r'each opponent', - r'all creatures', # "all creatures" without "you control" - r'all permanents', # "all permanents" without "you control" - r'each player', - r'each creature', # "each creature" without "you control" + r'opponents? control', # creatures your opponents control + r'opponent.*permanents?.*have', # opponent's permanents have +] + +# Blanket grant patterns - affects all permanents regardless of controller +# These are VALID protection grants that should be tagged (Blanket scope in M5) +BLANKET_GRANT_PATTERNS = [ + r'\ball creatures? (have|gain|get)\b', # All creatures gain hexproof + r'\ball permanents? (have|gain|get)\b', # All permanents gain indestructible + r'\beach creature (has|gains?|gets?)\b', # Each creature gains ward + r'\beach player\b', # Each player gains hexproof (very rare but valid blanket) ] # Kindred-specific grant patterns for metadata tagging @@ -179,9 +193,16 @@ def get_kindred_protection_tags(text: str) -> Set[str]: """ Identify kindred-specific protection grants for metadata tagging. - Returns a set of metadata tag names like "Knights Gain Protection". + Returns a set of metadata tag names like: + - "Knights Gain Hexproof" + - "Spiders Gain Ward" + - "Artifacts Gain Indestructible" - Uses both predefined patterns and dynamic creature type detection. + Uses both predefined patterns and dynamic creature type detection, + with specific ability detection (hexproof, ward, indestructible, shroud, protection). + + IMPORTANT: Only tags the specific abilities that appear in the same sentence + as the creature type grant to avoid false positives like Svyelun. """ if not text: return set() @@ -192,21 +213,52 @@ def get_kindred_protection_tags(text: str) -> Set[str]: text_lower = text.lower() tags = set() - # Check predefined patterns (specific kindred types we track) - for tag_name, patterns in KINDRED_GRANT_PATTERNS.items(): - for pattern in patterns: - if re.search(pattern, text_lower, re.IGNORECASE): - tags.add(tag_name) - break # Found match for this kindred type, move to next - - # Only check dynamic patterns if protection keywords present (performance optimization) - if not any(keyword in text_lower for keyword in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']): + # Only proceed if protective abilities are present (performance optimization) + protective_abilities = ['hexproof', 'shroud', 'indestructible', 'ward', 'protection'] + if not any(keyword in text_lower for keyword in protective_abilities): return tags + # Check predefined patterns (specific kindred types we track) + for tag_base, patterns in KINDRED_GRANT_PATTERNS.items(): + for pattern in patterns: + match = re.search(pattern, text_lower, re.IGNORECASE) + if match: + # Extract creature type from tag_base (e.g., "Knights" from "Knights Gain Protection") + creature_type = tag_base.split(' Gain ')[0] + # Get the matched text to check which abilities are in this specific grant + matched_text = match.group(0) + # Only tag abilities that appear in the matched phrase + if 'hexproof' in matched_text: + tags.add(f"{creature_type} Gain Hexproof") + if 'shroud' in matched_text: + tags.add(f"{creature_type} Gain Shroud") + if 'indestructible' in matched_text: + tags.add(f"{creature_type} Gain Indestructible") + if 'ward' in matched_text: + tags.add(f"{creature_type} Gain Ward") + if 'protection' in matched_text: + tags.add(f"{creature_type} Gain Protection") + break # Found match for this kindred type, move to next + # Use pre-compiled patterns for all creature types - for compiled_pattern, tag_name in KINDRED_PATTERNS: - if compiled_pattern.search(text_lower): - tags.add(tag_name) + for compiled_pattern, tag_template in KINDRED_PATTERNS: + match = compiled_pattern.search(text_lower) + if match: + # Extract creature type from tag_template (e.g., "Knights" from "Knights Gain Protection") + creature_type = tag_template.split(' Gain ')[0] + # Get the matched text to check which abilities are in this specific grant + matched_text = match.group(0) + # Only tag abilities that appear in the matched phrase + if 'hexproof' in matched_text: + tags.add(f"{creature_type} Gain Hexproof") + if 'shroud' in matched_text: + tags.add(f"{creature_type} Gain Shroud") + if 'indestructible' in matched_text: + tags.add(f"{creature_type} Gain Indestructible") + if 'ward' in matched_text: + tags.add(f"{creature_type} Gain Ward") + if 'protection' in matched_text: + tags.add(f"{creature_type} Gain Protection") # Don't break - a card could grant to multiple creature types return tags @@ -214,23 +266,33 @@ def get_kindred_protection_tags(text: str) -> Set[str]: def is_opponent_grant(text: str) -> bool: """ - Check if card grants protection to opponent's permanents or all permanents. + Check if card grants protection to opponent's permanents ONLY. - Returns True if this grants to opponents (should be excluded from Protection tag). + Returns True if this grants ONLY to opponents (should be excluded from Protection tag). + Does NOT exclude blanket effects like "all creatures gain hexproof" which help you too. """ if not text: return False text_lower = text.lower() - # Check for opponent grant patterns + # Remove reminder text (in parentheses) to avoid false positives + # Reminder text often mentions "opponents control" for hexproof/shroud explanations + text_no_reminder = re.sub(r'\([^)]*\)', '', text_lower) + + # Check for opponent-specific grant patterns in the main text (not reminder) for pattern in OPPONENT_GRANT_PATTERNS: - if re.search(pattern, text_lower, re.IGNORECASE): - # Make sure it's not "target opponent" for a different effect + match = re.search(pattern, text_no_reminder, re.IGNORECASE) + if match: # Must be in context of granting protection if any(prot in text_lower for prot in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']): - # Check if "you control" appears in same sentence - if 'you control' not in text_lower.split('.')[0]: + # Check the context around the match + context_start = max(0, match.start() - 30) + context_end = min(len(text_no_reminder), match.end() + 70) + context = text_no_reminder[context_start:context_end] + + # If "you control" appears in the context, it's limiting to YOUR permanents, not opponents + if 'you control' not in context: return True return False @@ -372,12 +434,11 @@ def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = Fal # Check for explicit grants with protection keywords found_grant = False - # Mass grant patterns (creatures you control have/gain) - for pattern in MASS_GRANT_PATTERNS: + # Blanket grant patterns (all creatures gain hexproof) - these are VALID grants + for pattern in BLANKET_GRANT_PATTERNS: match = re.search(pattern, text_lower, re.IGNORECASE) if match: - # Check if protection keyword appears in the same sentence or nearby (within 70 chars AFTER the match) - # This ensures we're looking at "creatures you control HAVE hexproof" not just having both phrases + # Check if protection keyword appears nearby context_start = match.start() context_end = min(len(text_lower), match.end() + 70) context = text_lower[context_start:context_end] @@ -386,6 +447,21 @@ def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = Fal found_grant = True break + # Mass grant patterns (creatures you control have/gain) + if not found_grant: + for pattern in MASS_GRANT_PATTERNS: + match = re.search(pattern, text_lower, re.IGNORECASE) + if match: + # Check if protection keyword appears in the same sentence or nearby (within 70 chars AFTER the match) + # This ensures we're looking at "creatures you control HAVE hexproof" not just having both phrases + context_start = match.start() + context_end = min(len(text_lower), match.end() + 70) + context = text_lower[context_start:context_end] + + if any(prot in context for prot in PROTECTION_KEYWORDS): + found_grant = True + break + # Targeted grant patterns (target creature gains) if not found_grant: for pattern in TARGETED_GRANT_PATTERNS: diff --git a/code/tagging/protection_scope_detection.py b/code/tagging/protection_scope_detection.py new file mode 100644 index 0000000..bffc768 --- /dev/null +++ b/code/tagging/protection_scope_detection.py @@ -0,0 +1,206 @@ +""" +Protection Scope Detection Module + +Detects the scope of protection effects (Self, Your Permanents, Blanket, Opponent Permanents) +to enable intelligent filtering in deck building. + +Part of M5: Protection Effect Granularity milestone. +""" + +import re +from typing import Optional, Set +from code.logging_util import get_logger + +logger = get_logger(__name__) + + +# Protection abilities to detect +PROTECTION_ABILITIES = [ + 'Protection', + 'Ward', + 'Hexproof', + 'Shroud', + 'Indestructible' +] + + +def detect_protection_scope(text: str, card_name: str, ability: str) -> Optional[str]: + """ + Detect the scope of a protection effect. + + Detection priority order (prevents misclassification): + 1. Opponent ownership → "Opponent Permanents" + 2. Your ownership → "Your Permanents" + 3. Self-reference → "Self" + 4. No ownership qualifier → "Blanket" + + Args: + text: Card text (lowercase for pattern matching) + card_name: Card name (for self-reference detection) + ability: Ability type (Ward, Hexproof, etc.) + + Returns: + Scope prefix or None: "Self", "Your Permanents", "Blanket", "Opponent Permanents" + """ + if not text or not ability: + return None + + text_lower = text.lower() + ability_lower = ability.lower() + card_name_lower = card_name.lower() + + # Check if ability is mentioned in text + if ability_lower not in text_lower: + return None + + # Priority 1: Opponent ownership (grants protection TO opponent's permanents) + # Note: Must distinguish from hexproof reminder text "opponents control [spells/abilities]" + # Only match when "opponents control" refers to creatures/permanents, not spells + opponent_patterns = [ + r'creatures?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', + r'permanents?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', + r'each\s+creature\s+an?\s+opponent\s+controls?\s+(?:has|gains?)' + ] + + for pattern in opponent_patterns: + if re.search(pattern, text_lower): + return "Opponent Permanents" + + # Priority 2: Check for self-reference BEFORE "Your Permanents" + # This prevents tilde (~) from being caught by creature type patterns + + # Check for tilde (~) - strong self-reference indicator + tilde_patterns = [ + r'~\s+(?:has|gains?)\s+' + ability_lower, + r'~\s+is\s+' + ability_lower + ] + + for pattern in tilde_patterns: + if re.search(pattern, text_lower): + return "Self" + + # Check for "this creature/permanent" pronouns + this_patterns = [ + r'this\s+(?:creature|permanent|artifact|enchantment)\s+(?:has|gains?)\s+' + ability_lower, + r'^(?:has|gains?)\s+' + ability_lower # Starts with ability (likely self) + ] + + for pattern in this_patterns: + if re.search(pattern, text_lower): + return "Self" + + # Check for card name (replace special characters for matching) + card_name_escaped = re.escape(card_name_lower) + if re.search(rf'\b{card_name_escaped}\b', text_lower): + # Make sure it's in a self-protection context + # e.g., "Svyelun has indestructible" not "Svyelun and other Merfolk" + self_context_patterns = [ + rf'\b{card_name_escaped}\s+(?:has|gains?)\s+{ability_lower}', + rf'\b{card_name_escaped}\s+is\s+{ability_lower}' + ] + for pattern in self_context_patterns: + if re.search(pattern, text_lower): + return "Self" + + # NEW: If no grant patterns found at all, assume inherent protection (Self) + # This catches cards where protection is in the keywords field but not explained in text + # e.g., "Protection from creatures" as a keyword line + # Check if we have the ability keyword but no grant patterns + has_grant_pattern = any(re.search(pattern, text_lower) for pattern in [ + r'(?:have|gain|grant|give|get)[s]?\s+', + r'other\s+', + r'creatures?\s+you\s+control', + r'permanents?\s+you\s+control', + r'equipped', + r'enchanted', + r'target' + ]) + + if not has_grant_pattern: + # No grant verbs found - likely inherent protection + return "Self" + + # Priority 3: Your ownership (most common) + # Note: "Other [Type]" patterns included for type-specific grants + # Note: "equipped creature", "target creature", etc. are permanents you control + your_patterns = [ + r'(?:other\s+)?(?:creatures?|permanents?|artifacts?|enchantments?)\s+you\s+control', + r'your\s+(?:creatures?|permanents?|artifacts?|enchantments?)', + r'each\s+(?:creature|permanent)\s+you\s+control', + r'other\s+\w+s?\s+you\s+control', # "Other Merfolk you control", etc. + # NEW: "Other X you control...have Y" pattern for static grants + r'other\s+(?:\w+\s+)?(?:creatures?|permanents?)\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, + r'other\s+\w+s?\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, # "Other Knights you control...have" + r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, # Equipment + r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, # Aura + r'target\s+(?:\w+\s+)?(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:gains?)\s+' + ability_lower # Target (with optional adjective) + ] + + for pattern in your_patterns: + if re.search(pattern, text_lower): + return "Your Permanents" + + # Priority 4: Blanket (no ownership qualifier) + # Only apply if we have protection keyword but no ownership context + # Note: Abilities can be listed with "and" (e.g., "gain hexproof and indestructible") + blanket_patterns = [ + r'all\s+(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower, + r'each\s+(?:creature|permanent)\s+(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, + r'(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower + ] + + for pattern in blanket_patterns: + if re.search(pattern, text_lower): + # Double-check no ownership was missed + if 'you control' not in text_lower and 'opponent' not in text_lower: + return "Blanket" + + return None + + +def get_protection_scope_tags(text: str, card_name: str) -> Set[str]: + """ + Get all protection scope metadata tags for a card. + + A card can have multiple protection scopes (e.g., self-hexproof + grants ward to others). + + Args: + text: Card text + card_name: Card name + + Returns: + Set of metadata tags like {"Self: Indestructible", "Your Permanents: Ward"} + """ + if not text or not card_name: + return set() + + scope_tags = set() + + # Check each protection ability + for ability in PROTECTION_ABILITIES: + scope = detect_protection_scope(text, card_name, ability) + + if scope: + # Format: "{Scope}: {Ability}" + tag = f"{scope}: {ability}" + scope_tags.add(tag) + logger.debug(f"Card '{card_name}': detected scope tag '{tag}'") + + return scope_tags + + +def has_any_protection(text: str) -> bool: + """ + Quick check if card text contains any protection keywords. + + Args: + text: Card text + + Returns: + True if any protection keyword found + """ + if not text: + return False + + text_lower = text.lower() + return any(ability.lower() in text_lower for ability in PROTECTION_ABILITIES) diff --git a/code/tagging/tag_constants.py b/code/tagging/tag_constants.py index 6e5f3c4..e3d8895 100644 --- a/code/tagging/tag_constants.py +++ b/code/tagging/tag_constants.py @@ -927,11 +927,32 @@ KEYWORD_ALLOWLIST: set[str] = { 'Tempting offer', 'Will of the council', 'Parley', 'Adamant', 'Devotion', } -# Metadata tag prefixes (for M3 - metadata partition) -# Tags matching these patterns should be classified as metadata, not themes +# ============================================================================== +# Metadata Tag Classification (M3 - Tagging Refinement) +# ============================================================================== + +# Metadata tag prefixes - tags starting with these are classified as metadata METADATA_TAG_PREFIXES: List[str] = [ 'Applied:', 'Bracket:', 'Diagnostic:', 'Internal:', -] \ No newline at end of file +] + +# Specific metadata tags (full match) - additional tags to classify as metadata +# These are typically diagnostic, bracket-related, or internal annotations +METADATA_TAG_ALLOWLIST: set[str] = { + # Bracket annotations + 'Bracket: Game Changer', + 'Bracket: Staple', + 'Bracket: Format Warping', + + # Cost reduction diagnostics (from Applied: namespace) + 'Applied: Cost Reduction', + + # Kindred-specific protection metadata (from M2) + # Format: "{CreatureType}s Gain Protection" + # These are auto-generated for kindred-specific protection grants + # Example: "Knights Gain Protection", "Frogs Gain Protection" + # Note: These are dynamically generated, so we match via prefix in classify_tag +} \ No newline at end of file diff --git a/code/tagging/tag_utils.py b/code/tagging/tag_utils.py index e731f07..eb58aa6 100644 --- a/code/tagging/tag_utils.py +++ b/code/tagging/tag_utils.py @@ -582,4 +582,80 @@ def normalize_keywords( normalized_keywords.add(normalized) - return sorted(list(normalized_keywords)) \ No newline at end of file + return sorted(list(normalized_keywords)) + + +# ============================================================================== +# M3: Metadata vs Theme Tag Classification +# ============================================================================== + +def classify_tag(tag: str) -> str: + """Classify a tag as either 'metadata' or 'theme'. + + Metadata tags are diagnostic, bracket-related, or internal annotations that + should not appear in theme catalogs or player-facing tag lists. Theme tags + represent gameplay mechanics and deck archetypes. + + Classification rules (in order of precedence): + 1. Prefix match: Tags starting with METADATA_TAG_PREFIXES → metadata + 2. Exact match: Tags in METADATA_TAG_ALLOWLIST → metadata + 3. Kindred pattern: "{Type}s Gain Protection" → metadata + 4. Default: All other tags → theme + + Args: + tag: Tag string to classify + + Returns: + "metadata" or "theme" + + Examples: + >>> classify_tag("Applied: Cost Reduction") + 'metadata' + >>> classify_tag("Bracket: Game Changer") + 'metadata' + >>> classify_tag("Knights Gain Protection") + 'metadata' + >>> classify_tag("Card Draw") + 'theme' + >>> classify_tag("Spellslinger") + 'theme' + """ + # Prefix-based classification + for prefix in tag_constants.METADATA_TAG_PREFIXES: + if tag.startswith(prefix): + return "metadata" + + # Exact match classification + if tag in tag_constants.METADATA_TAG_ALLOWLIST: + return "metadata" + + # Kindred protection metadata patterns: "{Type} Gain {Ability}" + # Covers all protective abilities: Protection, Ward, Hexproof, Shroud, Indestructible + # Examples: "Knights Gain Protection", "Spiders Gain Ward", "Merfolk Gain Ward" + # Note: Checks for " Gain " pattern since some creature types like "Merfolk" don't end in 's' + kindred_abilities = ["Protection", "Ward", "Hexproof", "Shroud", "Indestructible"] + for ability in kindred_abilities: + if " Gain " in tag and tag.endswith(ability): + return "metadata" + + # Protection scope metadata patterns (M5): "{Scope}: {Ability}" + # Indicates whether protection applies to self, your permanents, all permanents, or opponent's permanents + # Examples: "Self: Hexproof", "Your Permanents: Ward", "Blanket: Indestructible" + # These enable deck builder to filter for board-relevant protection vs self-only + protection_scopes = ["Self:", "Your Permanents:", "Blanket:", "Opponent Permanents:"] + for scope in protection_scopes: + if tag.startswith(scope): + return "metadata" + + # Phasing scope metadata patterns: "{Scope}: Phasing" + # Indicates whether phasing applies to self, your permanents, all permanents, or opponents + # Examples: "Self: Phasing", "Your Permanents: Phasing", "Blanket: Phasing", + # "Targeted: Phasing", "Opponent Permanents: Phasing" + # Similar to protection scopes, enables filtering for board-relevant phasing + # Opponent Permanents: Phasing also triggers Removal tag (removal-style phasing) + if tag in ["Self: Phasing", "Your Permanents: Phasing", "Blanket: Phasing", + "Targeted: Phasing", "Opponent Permanents: Phasing"]: + return "metadata" + + # Default: treat as theme tag + return "theme" \ No newline at end of file diff --git a/code/tagging/tagger.py b/code/tagging/tagger.py index b2b3f0b..94ef6da 100644 --- a/code/tagging/tagger.py +++ b/code/tagging/tagger.py @@ -159,6 +159,134 @@ def _write_compat_snapshot(df: pd.DataFrame, color: str) -> None: except Exception as exc: logger.warning("Failed to write unmerged snapshot for %s: %s", color, exc) + +def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str, Any]]: + """Partition tags into themeTags and metadataTags columns. + + Metadata tags are diagnostic, bracket-related, or internal annotations that + should not appear in theme catalogs or player-facing lists. This function: + 1. Creates a new 'metadataTags' column + 2. Classifies each tag in 'themeTags' as metadata or theme + 3. Moves metadata tags to 'metadataTags' column + 4. Keeps theme tags in 'themeTags' column + 5. Returns summary diagnostics + + Args: + df: DataFrame with 'themeTags' column (list of tag strings) + + Returns: + Tuple of (modified DataFrame, diagnostics dict) + + Diagnostics dict contains: + - total_rows: number of rows processed + - rows_with_tags: rows that had any tags + - metadata_tags_moved: total count of metadata tags moved + - theme_tags_kept: total count of theme tags kept + - tag_distribution: dict mapping tag -> classification + - most_common_metadata: list of (tag, count) tuples + - most_common_themes: list of (tag, count) tuples + + Example: + >>> df = pd.DataFrame({'themeTags': [['Card Draw', 'Applied: Cost Reduction']]}) + >>> df_out, diag = _apply_metadata_partition(df) + >>> df_out['themeTags'].iloc[0] + ['Card Draw'] + >>> df_out['metadataTags'].iloc[0] + ['Applied: Cost Reduction'] + >>> diag['metadata_tags_moved'] + 1 + """ + # Check feature flag directly from environment (not from settings module) + # This allows tests to monkeypatch the environment variable + tag_metadata_split = os.getenv('TAG_METADATA_SPLIT', '1').lower() not in ('0', 'false', 'off', 'disabled') + + # Feature flag check - return unmodified if disabled + if not tag_metadata_split: + logger.info("TAG_METADATA_SPLIT disabled, skipping metadata partition") + return df, { + "enabled": False, + "total_rows": len(df), + "message": "Feature disabled via TAG_METADATA_SPLIT=0" + } + + # Validate input + if 'themeTags' not in df.columns: + logger.warning("No 'themeTags' column found, skipping metadata partition") + return df, { + "enabled": True, + "error": "Missing themeTags column", + "total_rows": len(df) + } + + # Initialize metadataTags column + df['metadataTags'] = pd.Series([[] for _ in range(len(df))], index=df.index) + + # Track statistics + metadata_counts: Dict[str, int] = {} + theme_counts: Dict[str, int] = {} + total_metadata_moved = 0 + total_theme_kept = 0 + rows_with_tags = 0 + + # Process each row + for idx in df.index: + tags = df.at[idx, 'themeTags'] + + # Skip if not a list or empty + if not isinstance(tags, list) or not tags: + continue + + rows_with_tags += 1 + + # Classify each tag + metadata_tags = [] + theme_tags = [] + + for tag in tags: + classification = tag_utils.classify_tag(tag) + + if classification == "metadata": + metadata_tags.append(tag) + metadata_counts[tag] = metadata_counts.get(tag, 0) + 1 + total_metadata_moved += 1 + else: + theme_tags.append(tag) + theme_counts[tag] = theme_counts.get(tag, 0) + 1 + total_theme_kept += 1 + + # Update columns + df.at[idx, 'themeTags'] = theme_tags + df.at[idx, 'metadataTags'] = metadata_tags + + # Sort tag lists for top N reporting + most_common_metadata = sorted(metadata_counts.items(), key=lambda x: x[1], reverse=True)[:10] + most_common_themes = sorted(theme_counts.items(), key=lambda x: x[1], reverse=True)[:10] + + # Build diagnostics + diagnostics = { + "enabled": True, + "total_rows": len(df), + "rows_with_tags": rows_with_tags, + "metadata_tags_moved": total_metadata_moved, + "theme_tags_kept": total_theme_kept, + "unique_metadata_tags": len(metadata_counts), + "unique_theme_tags": len(theme_counts), + "most_common_metadata": most_common_metadata, + "most_common_themes": most_common_themes + } + + # Log summary + logger.info( + f"Metadata partition complete: {total_metadata_moved} metadata tags moved, " + f"{total_theme_kept} theme tags kept across {rows_with_tags} rows" + ) + + if most_common_metadata: + top_5_metadata = ', '.join([f"{tag}({ct})" for tag, ct in most_common_metadata[:5]]) + logger.info(f"Top metadata tags: {top_5_metadata}") + + return df, diagnostics + ### Setup ## Load the dataframe def load_dataframe(color: str) -> None: @@ -211,7 +339,14 @@ def load_dataframe(color: str) -> None: raise ValueError(f"Failed to add required columns: {still_missing}") # Load final dataframe with proper converters - df = pd.read_csv(filepath, converters={'themeTags': pd.eval, 'creatureTypes': pd.eval}) + # M3: metadataTags is optional (may not exist in older CSVs) + converters = {'themeTags': pd.eval, 'creatureTypes': pd.eval} + + # Add metadataTags converter if column exists + if 'metadataTags' in check_df.columns: + converters['metadataTags'] = pd.eval + + df = pd.read_csv(filepath, converters=converters) # Process the dataframe tag_by_color(df, color) @@ -331,8 +466,15 @@ def tag_by_color(df: pd.DataFrame, color: str) -> None: if color == 'commander': df = enrich_commander_rows_with_tags(df, CSV_DIRECTORY) - # Lastly, sort all theme tags for easier reading and reorder columns + # Sort all theme tags for easier reading and reorder columns df = sort_theme_tags(df, color) + + # M3: Partition metadata tags from theme tags + df, partition_diagnostics = _apply_metadata_partition(df) + if partition_diagnostics.get("enabled"): + logger.info(f"Metadata partition for {color}: {partition_diagnostics['metadata_tags_moved']} metadata, " + f"{partition_diagnostics['theme_tags_kept']} theme tags") + df.to_csv(f'{CSV_DIRECTORY}/{color}_cards.csv', index=False) #print(df) print('\n====================\n') @@ -6652,6 +6794,11 @@ def tag_for_interaction(df: pd.DataFrame, color: str) -> None: logger.info(f'Completed protection tagging in {(pd.Timestamp.now() - sub_start).total_seconds():.2f}s') print('\n==========\n') + sub_start = pd.Timestamp.now() + tag_for_phasing(df, color) + logger.info(f'Completed phasing tagging in {(pd.Timestamp.now() - sub_start).total_seconds():.2f}s') + print('\n==========\n') + sub_start = pd.Timestamp.now() tag_for_removal(df, color) logger.info(f'Completed removal tagging in {(pd.Timestamp.now() - sub_start).total_seconds():.2f}s') @@ -7076,24 +7223,59 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: ) final_mask = grant_mask - logger.info(f'Using M2 grant detection (TAG_PROTECTION_GRANTS=1)') + logger.info('Using M2 grant detection (TAG_PROTECTION_GRANTS=1)') # Apply kindred metadata tags for creature-type-specific grants + # Note: These are added to themeTags first, then _apply_metadata_partition() + # will classify them as metadata and move them to metadataTags column kindred_count = 0 for idx, row in df[final_mask].iterrows(): text = str(row.get('text', '')) kindred_tags = get_kindred_protection_tags(text) if kindred_tags: - # Add kindred-specific metadata tags - current_tags = str(row.get('metadataTags', '')) - existing = set(t.strip() for t in current_tags.split(',') if t.strip()) - existing.update(kindred_tags) - df.at[idx, 'metadataTags'] = ', '.join(sorted(existing)) + # Add to themeTags temporarily - partition will move to metadataTags + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + # Add kindred tags (they'll be classified as metadata later) + updated_tags = list(set(current_tags) | set(kindred_tags)) + df.at[idx, 'themeTags'] = updated_tags kindred_count += 1 if kindred_count > 0: - logger.info(f'Applied kindred metadata tags to {kindred_count} cards') + logger.info(f'Applied kindred protection tags to {kindred_count} cards (will be moved to metadata by partition)') + + # M5: Add protection scope metadata tags (Self, Your Permanents, Blanket, Opponent) + # Apply to ALL cards with protection effects, not just those that passed grant filter + # This ensures inherent protection cards like Aysen Highway get "Self: Protection" tags + from code.tagging.protection_scope_detection import get_protection_scope_tags, has_any_protection + + scope_count = 0 + for idx, row in df.iterrows(): + text = str(row.get('text', '')) + name = str(row.get('name', '')) + keywords = str(row.get('keywords', '')) + + # Check if card has ANY protection effects (text or keywords) + if not has_any_protection(text) and not any(k in keywords.lower() for k in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection', 'phasing']): + continue + + scope_tags = get_protection_scope_tags(text, name) + + if scope_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + # Add scope tags to themeTags (partition will move to metadataTags) + updated_tags = list(set(current_tags) | set(scope_tags)) + df.at[idx, 'themeTags'] = updated_tags + scope_count += 1 + + if scope_count > 0: + logger.info(f'Applied protection scope tags to {scope_count} cards (will be moved to metadata by partition)') else: # Legacy: Use original text/keyword patterns text_mask = create_protection_text_mask(df) @@ -7101,13 +7283,50 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: exclusion_mask = create_protection_exclusion_mask(df) final_mask = (text_mask | keyword_mask) & ~exclusion_mask - # Apply tags via rules engine + # Apply generic protection tags first tag_utils.apply_rules(df, rules=[ { 'mask': final_mask, 'tags': ['Protection', 'Interaction'] } ]) + + # Apply specific protection ability tags (Hexproof, Indestructible, etc.) + # These are theme tags indicating which specific protections the card provides + ability_tag_count = 0 + for idx, row in df[final_mask].iterrows(): + text = str(row.get('text', '')) + keywords = str(row.get('keywords', '')) + + # Detect which specific abilities are present + ability_tags = set() + text_lower = text.lower() + keywords_lower = keywords.lower() + + # Check for each protection ability + if 'hexproof' in text_lower or 'hexproof' in keywords_lower: + ability_tags.add('Hexproof') + if 'indestructible' in text_lower or 'indestructible' in keywords_lower: + ability_tags.add('Indestructible') + if 'shroud' in text_lower or 'shroud' in keywords_lower: + ability_tags.add('Shroud') + if 'ward' in text_lower or 'ward' in keywords_lower: + ability_tags.add('Ward') + if 'protection from' in text_lower or 'protection from' in keywords_lower: + ability_tags.add('Protection from Color') + + if ability_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + # Add ability tags to themeTags + updated_tags = list(set(current_tags) | ability_tags) + df.at[idx, 'themeTags'] = updated_tags + ability_tag_count += 1 + + if ability_tag_count > 0: + logger.info(f'Applied specific protection ability tags to {ability_tag_count} cards') # Log results duration = (pd.Timestamp.now() - start_time).total_seconds() @@ -7117,6 +7336,101 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: logger.error(f'Error in tag_for_protection: {str(e)}') raise +## Phasing effects +def tag_for_phasing(df: pd.DataFrame, color: str) -> None: + """Tag cards that provide phasing effects using vectorized operations. + + This function identifies and tags cards with phasing effects including: + - Cards that phase permanents out + - Cards with phasing keyword + + Similar to M5 protection tagging, adds scope metadata tags: + - Self: Phasing (card phases itself out) + - Your Permanents: Phasing (phases your permanents out) + - Blanket: Phasing (phases all permanents out) + + Args: + df: DataFrame containing card data + color: Color identifier for logging purposes + + Raises: + ValueError: If required DataFrame columns are missing + TypeError: If inputs are not of correct type + """ + start_time = pd.Timestamp.now() + logger.info(f'Starting phasing effect tagging for {color}_cards.csv') + + try: + # Validate inputs + if not isinstance(df, pd.DataFrame): + raise TypeError("df must be a pandas DataFrame") + if not isinstance(color, str): + raise TypeError("color must be a string") + + # Validate required columns + required_cols = {'text', 'themeTags', 'keywords'} + tag_utils.validate_dataframe_columns(df, required_cols) + + # Create mask for cards with phasing + from code.tagging.phasing_scope_detection import has_phasing, get_phasing_scope_tags, is_removal_phasing + + phasing_mask = df.apply( + lambda row: has_phasing(str(row.get('text', ''))) or + 'phasing' in str(row.get('keywords', '')).lower(), + axis=1 + ) + + # Apply generic "Phasing" theme tag first + tag_utils.apply_rules(df, rules=[ + { + 'mask': phasing_mask, + 'tags': ['Phasing', 'Interaction'] + } + ]) + + # Add phasing scope metadata tags and removal tags + scope_count = 0 + removal_count = 0 + for idx, row in df[phasing_mask].iterrows(): + text = str(row.get('text', '')) + name = str(row.get('name', '')) + keywords = str(row.get('keywords', '')) + + # Check if card has phasing (in text or keywords) + if not has_phasing(text) and 'phasing' not in keywords.lower(): + continue + + scope_tags = get_phasing_scope_tags(text, name, keywords) + + if scope_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + # Add scope tags to themeTags (partition will move to metadataTags) + updated_tags = list(set(current_tags) | scope_tags) + + # If this is removal-style phasing, add Removal tag + if is_removal_phasing(scope_tags): + updated_tags.append('Removal') + removal_count += 1 + + df.at[idx, 'themeTags'] = updated_tags + scope_count += 1 + + if scope_count > 0: + logger.info(f'Applied phasing scope tags to {scope_count} cards (will be moved to metadata by partition)') + if removal_count > 0: + logger.info(f'Applied Removal tag to {removal_count} cards with opponent-targeting phasing') + + # Log results + duration = (pd.Timestamp.now() - start_time).total_seconds() + logger.info(f'Tagged {phasing_mask.sum()} cards with phasing effects in {duration:.2f}s') + + except Exception as e: + logger.error(f'Error in tag_for_phasing: {str(e)}') + raise + ## Spot removal def create_removal_text_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards with removal text patterns. diff --git a/code/tests/test_additional_theme_config.py b/code/tests/test_additional_theme_config.py index 1d3dc80..5c6aae7 100644 --- a/code/tests/test_additional_theme_config.py +++ b/code/tests/test_additional_theme_config.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest -from headless_runner import _resolve_additional_theme_inputs, _parse_theme_list +from code.headless_runner import resolve_additional_theme_inputs as _resolve_additional_theme_inputs, _parse_theme_list def _write_catalog(path: Path) -> None: diff --git a/code/tests/test_metadata_partition.py b/code/tests/test_metadata_partition.py new file mode 100644 index 0000000..6b47960 --- /dev/null +++ b/code/tests/test_metadata_partition.py @@ -0,0 +1,300 @@ +"""Tests for M3 metadata/theme tag partition functionality. + +Tests cover: +- Tag classification (metadata vs theme) +- Column creation and data migration +- Feature flag behavior +- Compatibility with missing columns +- CSV read/write with new schema +""" +import pandas as pd +import pytest +from code.tagging import tag_utils +from code.tagging.tagger import _apply_metadata_partition + + +class TestTagClassification: + """Tests for classify_tag function.""" + + def test_prefix_based_metadata(self): + """Metadata tags identified by prefix.""" + assert tag_utils.classify_tag("Applied: Cost Reduction") == "metadata" + assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata" + assert tag_utils.classify_tag("Diagnostic: Test") == "metadata" + assert tag_utils.classify_tag("Internal: Debug") == "metadata" + + def test_exact_match_metadata(self): + """Metadata tags identified by exact match.""" + assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata" + assert tag_utils.classify_tag("Bracket: Staple") == "metadata" + + def test_kindred_protection_metadata(self): + """Kindred protection tags are metadata.""" + assert tag_utils.classify_tag("Knights Gain Protection") == "metadata" + assert tag_utils.classify_tag("Frogs Gain Protection") == "metadata" + assert tag_utils.classify_tag("Zombies Gain Protection") == "metadata" + + def test_theme_classification(self): + """Regular gameplay tags are themes.""" + assert tag_utils.classify_tag("Card Draw") == "theme" + assert tag_utils.classify_tag("Spellslinger") == "theme" + assert tag_utils.classify_tag("Tokens Matter") == "theme" + assert tag_utils.classify_tag("Ramp") == "theme" + assert tag_utils.classify_tag("Protection") == "theme" + + def test_edge_cases(self): + """Edge cases in tag classification.""" + # Empty string + assert tag_utils.classify_tag("") == "theme" + + # Similar but not exact matches + assert tag_utils.classify_tag("Apply: Something") == "theme" # Wrong prefix + assert tag_utils.classify_tag("Knights Have Protection") == "theme" # Not "Gain" + + # Case sensitivity + assert tag_utils.classify_tag("applied: Cost Reduction") == "theme" # Lowercase + + +class TestMetadataPartition: + """Tests for _apply_metadata_partition function.""" + + def test_basic_partition(self, monkeypatch): + """Basic partition splits tags correctly.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A', 'Card B'], + 'themeTags': [ + ['Card Draw', 'Applied: Cost Reduction'], + ['Spellslinger', 'Bracket: Game Changer', 'Tokens Matter'] + ] + }) + + df_out, diag = _apply_metadata_partition(df) + + # Check theme tags + assert df_out.loc[0, 'themeTags'] == ['Card Draw'] + assert df_out.loc[1, 'themeTags'] == ['Spellslinger', 'Tokens Matter'] + + # Check metadata tags + assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction'] + assert df_out.loc[1, 'metadataTags'] == ['Bracket: Game Changer'] + + # Check diagnostics + assert diag['enabled'] is True + assert diag['rows_with_tags'] == 2 + assert diag['metadata_tags_moved'] == 2 + assert diag['theme_tags_kept'] == 3 + + def test_empty_tags(self, monkeypatch): + """Handles empty tag lists.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A', 'Card B'], + 'themeTags': [[], ['Card Draw']] + }) + + df_out, diag = _apply_metadata_partition(df) + + assert df_out.loc[0, 'themeTags'] == [] + assert df_out.loc[0, 'metadataTags'] == [] + assert df_out.loc[1, 'themeTags'] == ['Card Draw'] + assert df_out.loc[1, 'metadataTags'] == [] + + assert diag['rows_with_tags'] == 1 + + def test_all_metadata_tags(self, monkeypatch): + """Handles rows with only metadata tags.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Applied: Cost Reduction', 'Bracket: Game Changer']] + }) + + df_out, diag = _apply_metadata_partition(df) + + assert df_out.loc[0, 'themeTags'] == [] + assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction', 'Bracket: Game Changer'] + + assert diag['metadata_tags_moved'] == 2 + assert diag['theme_tags_kept'] == 0 + + def test_all_theme_tags(self, monkeypatch): + """Handles rows with only theme tags.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Card Draw', 'Ramp', 'Spellslinger']] + }) + + df_out, diag = _apply_metadata_partition(df) + + assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Ramp', 'Spellslinger'] + assert df_out.loc[0, 'metadataTags'] == [] + + assert diag['metadata_tags_moved'] == 0 + assert diag['theme_tags_kept'] == 3 + + def test_feature_flag_disabled(self, monkeypatch): + """Feature flag disables partition.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '0') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Card Draw', 'Applied: Cost Reduction']] + }) + + df_out, diag = _apply_metadata_partition(df) + + # Should not create metadataTags column + assert 'metadataTags' not in df_out.columns + + # Should not modify themeTags + assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction'] + + # Should indicate disabled + assert diag['enabled'] is False + + def test_missing_theme_tags_column(self, monkeypatch): + """Handles missing themeTags column gracefully.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'other_column': ['value'] + }) + + df_out, diag = _apply_metadata_partition(df) + + # Should return unchanged + assert 'themeTags' not in df_out.columns + assert 'metadataTags' not in df_out.columns + + # Should indicate error + assert diag['enabled'] is True + assert 'error' in diag + + def test_non_list_tags(self, monkeypatch): + """Handles non-list values in themeTags.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A', 'Card B', 'Card C'], + 'themeTags': [['Card Draw'], None, 'not a list'] + }) + + df_out, diag = _apply_metadata_partition(df) + + # Only first row should be processed + assert df_out.loc[0, 'themeTags'] == ['Card Draw'] + assert df_out.loc[0, 'metadataTags'] == [] + + assert diag['rows_with_tags'] == 1 + + def test_kindred_protection_partition(self, monkeypatch): + """Kindred protection tags are moved to metadata.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Protection', 'Knights Gain Protection', 'Card Draw']] + }) + + df_out, diag = _apply_metadata_partition(df) + + assert 'Protection' in df_out.loc[0, 'themeTags'] + assert 'Card Draw' in df_out.loc[0, 'themeTags'] + assert 'Knights Gain Protection' in df_out.loc[0, 'metadataTags'] + + def test_diagnostics_structure(self, monkeypatch): + """Diagnostics contain expected fields.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Card Draw', 'Applied: Cost Reduction']] + }) + + df_out, diag = _apply_metadata_partition(df) + + # Check required diagnostic fields + assert 'enabled' in diag + assert 'total_rows' in diag + assert 'rows_with_tags' in diag + assert 'metadata_tags_moved' in diag + assert 'theme_tags_kept' in diag + assert 'unique_metadata_tags' in diag + assert 'unique_theme_tags' in diag + assert 'most_common_metadata' in diag + assert 'most_common_themes' in diag + + # Check types + assert isinstance(diag['most_common_metadata'], list) + assert isinstance(diag['most_common_themes'], list) + + +class TestCSVCompatibility: + """Tests for CSV read/write with new schema.""" + + def test_csv_roundtrip_with_metadata(self, tmp_path, monkeypatch): + """CSV roundtrip preserves both columns.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + csv_path = tmp_path / "test_cards.csv" + + # Create initial dataframe + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Card Draw', 'Ramp']], + 'metadataTags': [['Applied: Cost Reduction']] + }) + + # Write to CSV + df.to_csv(csv_path, index=False) + + # Read back + df_read = pd.read_csv( + csv_path, + converters={'themeTags': pd.eval, 'metadataTags': pd.eval} + ) + + # Verify data preserved + assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Ramp'] + assert df_read.loc[0, 'metadataTags'] == ['Applied: Cost Reduction'] + + def test_csv_backward_compatible(self, tmp_path, monkeypatch): + """Can read old CSVs without metadataTags.""" + monkeypatch.setenv('TAG_METADATA_SPLIT', '1') + + csv_path = tmp_path / "old_cards.csv" + + # Create old-style CSV without metadataTags + df = pd.DataFrame({ + 'name': ['Card A'], + 'themeTags': [['Card Draw', 'Applied: Cost Reduction']] + }) + df.to_csv(csv_path, index=False) + + # Read back + df_read = pd.read_csv(csv_path, converters={'themeTags': pd.eval}) + + # Should read successfully + assert 'themeTags' in df_read.columns + assert 'metadataTags' not in df_read.columns + assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction'] + + # Apply partition + df_partitioned, _ = _apply_metadata_partition(df_read) + + # Should now have both columns + assert 'themeTags' in df_partitioned.columns + assert 'metadataTags' in df_partitioned.columns + assert df_partitioned.loc[0, 'themeTags'] == ['Card Draw'] + assert df_partitioned.loc[0, 'metadataTags'] == ['Applied: Cost Reduction'] + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/code/web/routes/decks.py b/code/web/routes/decks.py index f84d9a1..957936b 100644 --- a/code/web/routes/decks.py +++ b/code/web/routes/decks.py @@ -159,11 +159,18 @@ def _read_csv_summary(csv_path: Path) -> Tuple[dict, Dict[str, int], Dict[str, i # Type counts/cards (exclude commander entry from distribution) if not is_commander: type_counts[cat] = type_counts.get(cat, 0) + cnt + # M5: Extract metadata tags column if present + metadata_tags_raw = '' + metadata_idx = headers.index('MetadataTags') if 'MetadataTags' in headers else -1 + if metadata_idx >= 0 and metadata_idx < len(row): + metadata_tags_raw = row[metadata_idx] or '' + metadata_tags_list = [t.strip() for t in metadata_tags_raw.split(';') if t.strip()] type_cards.setdefault(cat, []).append({ 'name': name, 'count': cnt, 'role': role, 'tags': tags_list, + 'metadata_tags': metadata_tags_list, # M5: Include metadata tags }) # Curve diff --git a/code/web/templates/base.html b/code/web/templates/base.html index 050d57c..b8a0d88 100644 --- a/code/web/templates/base.html +++ b/code/web/templates/base.html @@ -1012,6 +1012,7 @@ var role = (attr('data-role')||'').trim(); var reasonsRaw = attr('data-reasons')||''; var tagsRaw = attr('data-tags')||''; + var metadataTagsRaw = attr('data-metadata-tags')||''; // M5: Extract metadata tags var reasonsRaw = attr('data-reasons')||''; var roleEl = panel.querySelector('.hcp-role'); var hasFlip = !!card.querySelector('.dfc-toggle'); @@ -1116,6 +1117,14 @@ tagsEl.style.display = 'none'; } else { var tagText = allTags.map(displayLabel).join(', '); + // M5: Temporarily append metadata tags for debugging + if(metadataTagsRaw && metadataTagsRaw.trim()){ + var metaTags = metadataTagsRaw.split(',').map(function(t){return t.trim();}).filter(Boolean); + if(metaTags.length){ + var metaText = metaTags.map(displayLabel).join(', '); + tagText = tagText ? (tagText + ' | META: ' + metaText) : ('META: ' + metaText); + } + } tagsEl.textContent = tagText; tagsEl.style.display = tagText ? '' : 'none'; } diff --git a/code/web/templates/partials/deck_summary.html b/code/web/templates/partials/deck_summary.html index e327bef..d7b0e0d 100644 --- a/code/web/templates/partials/deck_summary.html +++ b/code/web/templates/partials/deck_summary.html @@ -74,7 +74,7 @@ {% set owned = (owned_set is defined and c.name and (c.name|lower in owned_set)) %} {{ cnt }} x - {{ c.name }} + {{ c.name }}