diff --git a/CHANGELOG.md b/CHANGELOG.md index ba01974..8801512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,9 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning - Intelligent deck builder filtering includes board-relevant protection while excluding self-only and type-specific cards - Tiered pool limiting focuses on high-quality staples while maintaining variety across builds - Improved scope tagging for cards with keyword-only protection effects (no grant text, just inherent keywords) +- **Tagging Module Refactoring**: Large-scale refactor to improve code quality and maintainability + - Centralized regex patterns, extracted reusable utilities, decomposed complex functions + - Improved code organization and readability while maintaining 100% tagging accuracy ### Added - Metadata partition system separates diagnostic tags from gameplay themes in card data @@ -42,11 +45,13 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning - Setup progress polling reduced from 3s to 5-10s intervals for better performance - Theme catalog streamlined from 753 to 736 themes (-2.3%) with improved quality - Protection tag refined to focus on 329 cards that grant shields (down from 1,166 with inherent effects) +- Protection tag renamed to "Protective Effects" throughout web interface to avoid confusion with the Magic keyword "protection" - Theme catalog automatically excludes metadata tags from theme suggestions - Grant detection now strips reminder text before pattern matching to avoid false positives - Deck builder protection phase now filters by scope metadata: includes "Your Permanents:", excludes "Self:" protection - Protection card selection now randomized per build for variety (using seeded RNG when deterministic mode enabled) - Protection pool now limited to ~40-50 high-quality cards (tiered selection: top 3x target + random 10-20 extras) +- Tagging module imports standardized with consistent organization and centralized constants ### Fixed - Setup progress now shows 100% completion instead of getting stuck at 99% @@ -63,6 +68,9 @@ This format follows Keep a Changelog principles and aims for Semantic Versioning - Cloak of Invisibility, Teferi's Curse now get "Your Permanents: Phasing" tags - Shimmer now gets "Blanket: Phasing" tag for chosen type effect - King of the Oathbreakers now gets "Self: Phasing" tag for reactive trigger +- Cards with static keywords (Protection, Hexproof, Ward, Indestructible) in their keywords field now get proper scope metadata tags +- Cards with X in their mana cost now properly identified and tagged with "X Spells" theme for better deck building accuracy +- Card tagging system enhanced with smarter pattern detection and more consistent categorization ## [2.5.2] - 2025-10-08 ### Summary diff --git a/README.md b/README.md index 7ad729e..6089672 100644 --- a/README.md +++ b/README.md @@ -101,13 +101,49 @@ Execute saved configs without manual input. Refresh data and caches when formats shift. - Runs card downloads, CSV regeneration, smart tagging (keywords + protection grants), and commander catalog rebuilds. - Controlled by `SHOW_SETUP=1` (on by default in compose). -- Force a rebuild manually: +- **Force a full rebuild (setup + tagging)**: ```powershell - docker compose run --rm --entrypoint bash web -lc "python -m code.file_setup.setup" + # Docker: + docker compose run --rm web python -c "from code.file_setup.setup import initial_setup; from code.tagging.tagger import run_tagging; initial_setup(); run_tagging()" + + # Local (with venv activated): + python -c "from code.file_setup.setup import initial_setup; from code.tagging.tagger import run_tagging; initial_setup(); run_tagging()" + + # With parallel processing (faster): + python -c "from code.file_setup.setup import initial_setup; from code.tagging.tagger import run_tagging; initial_setup(); run_tagging(parallel=True)" + + # With parallel processing and custom worker count: + python -c "from code.file_setup.setup import initial_setup; from code.tagging.tagger import run_tagging; initial_setup(); run_tagging(parallel=True, max_workers=4)" ``` -- Rebuild only the commander catalog: +- **Rebuild only CSVs without tagging**: ```powershell - docker compose run --rm --entrypoint bash web -lc "python -m code.scripts.refresh_commander_catalog" + # Docker: + docker compose run --rm web python -c "from code.file_setup.setup import initial_setup; initial_setup()" + + # Local: + python -c "from code.file_setup.setup import initial_setup; initial_setup()" + ``` +- **Run only tagging (CSVs must exist)**: + ```powershell + # Docker: + docker compose run --rm web python -c "from code.tagging.tagger import run_tagging; run_tagging()" + + # Local: + python -c "from code.tagging.tagger import run_tagging; run_tagging()" + + # With parallel processing (faster): + python -c "from code.tagging.tagger import run_tagging; run_tagging(parallel=True)" + + # With parallel processing and custom worker count: + python -c "from code.tagging.tagger import run_tagging; run_tagging(parallel=True, max_workers=4)" + ``` +- **Rebuild only the commander catalog**: + ```powershell + # Docker: + docker compose run --rm web python -m code.scripts.refresh_commander_catalog + + # Local: + python -m code.scripts.refresh_commander_catalog ``` ### Owned Library diff --git a/RELEASE_NOTES_TEMPLATE.md b/RELEASE_NOTES_TEMPLATE.md index c32861c..231f8ce 100644 --- a/RELEASE_NOTES_TEMPLATE.md +++ b/RELEASE_NOTES_TEMPLATE.md @@ -1,61 +1,66 @@ # MTG Python Deckbuilder ${VERSION} ## [Unreleased] - ### Summary -- Card tagging improvements separate gameplay themes from internal metadata for cleaner deck building -- Keyword cleanup reduces specialty keyword noise by 96% while keeping important mechanics -- Protection tag now highlights cards that grant shields to your board, not just inherent protection -- **Protection System Overhaul**: Smarter card detection, scope-aware filtering, and focused pool selection deliver consistent, high-quality protection card recommendations - - Deck builder distinguishes between board-wide protection and self-only effects using fine-grained metadata - - Intelligent pool limiting focuses on high-quality staples while maintaining variety across builds - - Scope-aware filtering automatically excludes self-protection and type-specific cards that don't match your deck - - Enhanced detection handles Equipment, Auras, phasing effects, and complex triggers correctly -- Web UI responsiveness upgrades with smarter caching and streamlined loading +- Card tagging system improvements split metadata from gameplay themes for cleaner deck building experience +- Keyword normalization reduces specialty keyword noise by 96% while maintaining theme catalog quality +- Protection tag now focuses on cards that grant shields to others, not just those with inherent protection +- Web UI improvements: faster polling, fixed progress display, and theme refresh stability +- **Protection System Overhaul**: Comprehensive enhancement to protection card detection, classification, and deck building + - Fine-grained scope metadata distinguishes self-protection from board-wide effects ("Your Permanents: Hexproof" vs "Self: Hexproof") + - Enhanced grant detection with Equipment/Aura patterns, phasing support, and complex trigger handling + - Intelligent deck builder filtering includes board-relevant protection while excluding self-only and type-specific cards + - Tiered pool limiting focuses on high-quality staples while maintaining variety across builds + - Improved scope tagging for cards with keyword-only protection effects (no grant text, just inherent keywords) +- **Tagging Module Refactoring**: Large-scale refactor to improve code quality and maintainability + - Centralized regex patterns, extracted reusable utilities, decomposed complex functions + - Improved code organization and readability while maintaining 100% tagging accuracy ### Added -- Metadata partition keeps internal tags separate from gameplay themes -- Keyword normalization filters out one-off specialty mechanics while keeping evergreen abilities -- Protection grant detection identifies cards that give Hexproof, Ward, or other shields to your permanents -- Creature-type-specific protection automatically tagged (e.g., "Knights Gain Protection" for tribal strategies) -- Protection scope filtering (feature flag: `TAG_PROTECTION_SCOPE`) automatically excludes self-only protection like Svyelun -- Phasing cards with protective effects now included in protection pool (e.g., cards that phase out your permanents) -- Debug mode: Hover over cards to see metadata tags showing protection scope (e.g., "Your Permanents: Hexproof") -- Skeleton placeholders with smart timing across build wizard and commander catalog -- Must-have toggle API with telemetry tracking for include/exclude interactions -- Commander catalog lazy-loads art and caches frequently accessed views -- Collapsible sections for mana analytics defer loading until expanded -- Click-to-pin chart tooltips for easier card comparisons -- Virtualized card lists handle large decks smoothly +- Metadata partition system separates diagnostic tags from gameplay themes in card data +- Keyword normalization system with smart filtering of one-off specialty mechanics +- Allowlist preserves important keywords like Flying, Myriad, and Transform +- Protection grant detection identifies cards that give Hexproof, Ward, or Indestructible to other permanents +- Automatic tagging for creature-type-specific protection (e.g., "Knights Gain Protection") +- New `metadataTags` column in card data for bracket annotations and internal diagnostics +- Static phasing keyword detection from keywords field (catches creatures like Breezekeeper) +- "Other X you control have Y" protection pattern for static ability grants +- "Enchanted creature has phasing" pattern detection +- Chosen type blanket phasing patterns +- Complex trigger phasing patterns (reactive, consequent, end-of-turn) +- Protection scope filtering in deck builder (feature flag: `TAG_PROTECTION_SCOPE`) intelligently selects board-relevant protection +- Phasing cards with "Your Permanents:" or "Targeted:" metadata now tagged as Protection and included in protection pool +- Metadata tags temporarily visible in card hover previews for debugging (shows scope like "Your Permanents: Hexproof") ### Changed - Card tags now split between themes (for deck building) and metadata (for diagnostics) -- Keywords consolidate variants (e.g., "Commander ninjutsu" → "Ninjutsu") for consistent theme matching -- Protection tag refined to focus on shield-granting cards (329 cards vs 1,166 previously) -- Deck builder protection phase filters by scope: includes "Your Permanents:", excludes "Self:" protection -- Protection card selection randomized for variety across builds (deterministic when using seeded mode) -- Theme catalog streamlined with improved quality (736 themes, down 2.3%) -- Theme catalog automatically excludes metadata tags from suggestions -- Commander search and theme picker share intelligent debounce to prevent redundant requests -- Include/exclude buttons respond immediately with optimistic updates -- Commander catalog default view loads from cache for sub-200ms response times -- Deck review loads in focused chunks for faster initial page loads -- Chart hover zones expanded for easier interaction +- Keywords now consolidate variants (e.g., "Commander ninjutsu" becomes "Ninjutsu") +- Setup progress polling reduced from 3s to 5-10s intervals for better performance +- Theme catalog streamlined from 753 to 736 themes (-2.3%) with improved quality +- Protection tag refined to focus on 329 cards that grant shields (down from 1,166 with inherent effects) +- Protection tag renamed to "Protective Effects" throughout web interface to avoid confusion with the Magic keyword "protection" +- Theme catalog automatically excludes metadata tags from theme suggestions +- Grant detection now strips reminder text before pattern matching to avoid false positives +- Deck builder protection phase now filters by scope metadata: includes "Your Permanents:", excludes "Self:" protection +- Protection card selection now randomized per build for variety (using seeded RNG when deterministic mode enabled) +- Protection pool now limited to ~40-50 high-quality cards (tiered selection: top 3x target + random 10-20 extras) +- Tagging module imports standardized with consistent organization and centralized constants ### Fixed -### Fixed -- Setup progress correctly displays 100% upon completion -- Theme catalog refresh stability improved after initial setup -- Server polling optimized for reduced load -- Protection detection accurately filters inherent vs granted effects -- Protection scope detection improvements for 11+ cards: - - Dive Down, Glint no longer falsely marked as opponent grants (reminder text now stripped) - - Drogskol Captain and similar cards with "Other X you control have Y" patterns now tagged correctly - - 7 cards with static Phasing keyword now detected (Breezekeeper, Teferi's Drake, etc.) - - Cloak of Invisibility and Teferi's Curse now get "Your Permanents: Phasing" tags - - Shimmer now gets "Blanket: Phasing" for chosen type effect - - King of the Oathbreakers reactive trigger now properly detected -- Type-specific protection (Knight Exemplar, Timber Protector) no longer added to non-matching decks -- Deck builder correctly excludes "Self:" protection cards (e.g., Svyelun) from protection pool -- Inherent protection cards (Aysen Highway, Phantom Colossus) now correctly receive scope metadata tags -- Protection pool now intelligently limited to focus on high-quality, relevant cards for your deck +- Setup progress now shows 100% completion instead of getting stuck at 99% +- Theme catalog no longer continuously regenerates after setup completes +- Health indicator polling optimized to reduce server load +- Protection detection now correctly excludes creatures with only inherent keywords +- Dive Down, Glint no longer falsely identified as granting to opponents (reminder text fix) +- Drogskol Captain, Haytham Kenway now correctly get "Your Permanents" scope tags +- 7 cards with static Phasing keyword now properly detected (Breezekeeper, Teferi's Drake, etc.) +- Type-specific protection grants (e.g., "Knights Gain Indestructible") now correctly excluded from general protection pool +- Protection scope filter now properly prioritizes exclusions over inclusions (fixes Knight Exemplar in non-Knight decks) +- Inherent protection cards (Aysen Highway, Phantom Colossus, etc.) now correctly get "Self: Protection" metadata tags +- Scope tagging now applies to ALL cards with protection effects, not just grant cards +- Cloak of Invisibility, Teferi's Curse now get "Your Permanents: Phasing" tags +- Shimmer now gets "Blanket: Phasing" tag for chosen type effect +- King of the Oathbreakers now gets "Self: Phasing" tag for reactive trigger +- Cards with static keywords (Protection, Hexproof, Ward, Indestructible) in their keywords field now get proper scope metadata tags +- Cards with X in their mana cost now properly identified and tagged with "X Spells" theme for better deck building accuracy +- Card tagging system enhanced with smarter pattern detection and more consistent categorization diff --git a/_tmp_run_catalog.ps1 b/_tmp_run_catalog.ps1 deleted file mode 100644 index 36db49d..0000000 --- a/_tmp_run_catalog.ps1 +++ /dev/null @@ -1 +0,0 @@ -=\ 1\; & \c:/Users/Matt/mtg_python/mtg_python_deckbuilder/.venv/Scripts/python.exe\ code/scripts/build_theme_catalog.py --output config/themes/theme_list_tmp.json diff --git a/code/deck_builder/builder_constants.py b/code/deck_builder/builder_constants.py index d7cc810..6193869 100644 --- a/code/deck_builder/builder_constants.py +++ b/code/deck_builder/builder_constants.py @@ -438,7 +438,7 @@ DEFAULT_REMOVAL_COUNT: Final[int] = 10 # Default number of spot removal spells DEFAULT_WIPES_COUNT: Final[int] = 2 # Default number of board wipes DEFAULT_CARD_ADVANTAGE_COUNT: Final[int] = 10 # Default number of card advantage pieces -DEFAULT_PROTECTION_COUNT: Final[int] = 8 # Default number of protection spells +DEFAULT_PROTECTION_COUNT: Final[int] = 8 # Default number of protective effects (hexproof, indestructible, protection, ward, etc.) # Deck composition prompts DECK_COMPOSITION_PROMPTS: Final[Dict[str, str]] = { @@ -450,7 +450,7 @@ DECK_COMPOSITION_PROMPTS: Final[Dict[str, str]] = { 'removal': 'Enter desired number of spot removal spells (default: 10):', 'wipes': 'Enter desired number of board wipes (default: 2):', 'card_advantage': 'Enter desired number of card advantage pieces (default: 10):', - 'protection': 'Enter desired number of protection spells (default: 8):', + 'protection': 'Enter desired number of protective effects (default: 8):', 'max_deck_price': 'Enter maximum total deck price in dollars (default: 400.0):', 'max_card_price': 'Enter maximum price per card in dollars (default: 20.0):' } @@ -511,7 +511,7 @@ DEFAULT_THEME_TAGS = [ 'Combat Matters', 'Control', 'Counters Matter', 'Energy', 'Enter the Battlefield', 'Equipment', 'Exile Matters', 'Infect', 'Interaction', 'Lands Matter', 'Leave the Battlefield', 'Legends Matter', - 'Life Matters', 'Mill', 'Monarch', 'Protection', 'Ramp', 'Reanimate', + 'Life Matters', 'Mill', 'Monarch', 'Protective Effects', 'Ramp', 'Reanimate', 'Removal', 'Sacrifice Matters', 'Spellslinger', 'Stax', 'Superfriends', 'Theft', 'Token Creation', 'Tokens Matter', 'Voltron', 'X Spells' ] diff --git a/code/tagging/bracket_policy_applier.py b/code/tagging/bracket_policy_applier.py index 29de35f..80c63b0 100644 --- a/code/tagging/bracket_policy_applier.py +++ b/code/tagging/bracket_policy_applier.py @@ -1,9 +1,11 @@ from __future__ import annotations +# Standard library imports import json from pathlib import Path from typing import Dict, Iterable, Set +# Third-party imports import pandas as pd def _ensure_norm_series(df: pd.DataFrame, source_col: str, norm_col: str) -> pd.Series: diff --git a/code/tagging/combo_schema.py b/code/tagging/combo_schema.py index e0129a4..088db69 100644 --- a/code/tagging/combo_schema.py +++ b/code/tagging/combo_schema.py @@ -1,9 +1,11 @@ from __future__ import annotations +# Standard library imports +import json from pathlib import Path from typing import List, Optional -import json +# Third-party imports from pydantic import BaseModel, Field diff --git a/code/tagging/combo_tag_applier.py b/code/tagging/combo_tag_applier.py index 52d7496..1e0ad68 100644 --- a/code/tagging/combo_tag_applier.py +++ b/code/tagging/combo_tag_applier.py @@ -1,14 +1,17 @@ from __future__ import annotations -import json +# Standard library imports import ast +import json +from collections import defaultdict from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Set, DefaultDict -from collections import defaultdict +from typing import DefaultDict, Dict, List, Set +# Third-party imports import pandas as pd +# Local application imports from settings import CSV_DIRECTORY, SETUP_COLORS diff --git a/code/tagging/multi_face_merger.py b/code/tagging/multi_face_merger.py index 8fd679d..0dd2753 100644 --- a/code/tagging/multi_face_merger.py +++ b/code/tagging/multi_face_merger.py @@ -73,6 +73,132 @@ def load_merge_summary() -> Dict[str, Any]: return {"updated_at": None, "colors": {}} +def _merge_tag_columns(work_df: pd.DataFrame, group_sorted: pd.DataFrame, primary_idx: int) -> None: + """Merge list columns (themeTags, roleTags) into union values. + + Args: + work_df: Working DataFrame to update + group_sorted: Sorted group of faces for a multi-face card + primary_idx: Index of primary face to update + """ + for column in _LIST_UNION_COLUMNS: + if column in group_sorted.columns: + union_values = _merge_object_lists(group_sorted[column]) + work_df.at[primary_idx, column] = union_values + + if "keywords" in group_sorted.columns: + keyword_union = _merge_keywords(group_sorted["keywords"]) + work_df.at[primary_idx, "keywords"] = _join_keywords(keyword_union) + + +def _build_face_payload(face_row: pd.Series) -> Dict[str, Any]: + """Build face metadata payload from a single face row. + + Args: + face_row: Single face row from grouped DataFrame + + Returns: + Dictionary containing face metadata + """ + text_val = face_row.get("text") or face_row.get("oracleText") or "" + mana_cost_val = face_row.get("manaCost", face_row.get("mana_cost", "")) or "" + mana_value_raw = face_row.get("manaValue", face_row.get("mana_value", "")) + + try: + if mana_value_raw in (None, ""): + mana_value_val = None + else: + mana_value_val = float(mana_value_raw) + if math.isnan(mana_value_val): + mana_value_val = None + except Exception: + mana_value_val = None + + type_val = face_row.get("type", "") or "" + + return { + "face": str(face_row.get("faceName") or face_row.get("name") or ""), + "side": str(face_row.get("side") or ""), + "layout": str(face_row.get("layout") or ""), + "themeTags": _merge_object_lists([face_row.get("themeTags", [])]), + "roleTags": _merge_object_lists([face_row.get("roleTags", [])]), + "type": str(type_val), + "text": str(text_val), + "mana_cost": str(mana_cost_val), + "mana_value": mana_value_val, + "produces_mana": _text_produces_mana(text_val), + "is_land": 'land' in str(type_val).lower(), + } + + +def _build_merge_detail(name: str, group_sorted: pd.DataFrame, faces_payload: List[Dict[str, Any]]) -> Dict[str, Any]: + """Build detailed merge information for a multi-face card group. + + Args: + name: Card name + group_sorted: Sorted group of faces + faces_payload: List of face metadata dictionaries + + Returns: + Dictionary containing merge details + """ + layout_set = sorted({f.get("layout", "") for f in faces_payload if f.get("layout")}) + removed_faces = faces_payload[1:] if len(faces_payload) > 1 else [] + + return { + "name": name, + "total_faces": len(group_sorted), + "dropped_faces": max(len(group_sorted) - 1, 0), + "layouts": layout_set, + "primary_face": faces_payload[0] if faces_payload else {}, + "removed_faces": removed_faces, + "theme_tags": sorted({tag for face in faces_payload for tag in face.get("themeTags", [])}), + "role_tags": sorted({tag for face in faces_payload for tag in face.get("roleTags", [])}), + "faces": faces_payload, + } + + +def _log_merge_summary(color: str, merged_count: int, drop_count: int, multi_face_count: int, logger) -> None: + """Log merge summary with structured and human-readable formats. + + Args: + color: Color being processed + merged_count: Number of card groups merged + drop_count: Number of face rows dropped + multi_face_count: Total multi-face rows processed + logger: Logger instance + """ + try: + logger.info( + "dfc_merge_summary %s", + json.dumps( + { + "event": "dfc_merge_summary", + "color": color, + "groups_merged": merged_count, + "faces_dropped": drop_count, + "multi_face_rows": multi_face_count, + }, + sort_keys=True, + ), + ) + except Exception: + logger.info( + "dfc_merge_summary event=%s groups=%d dropped=%d rows=%d", + color, + merged_count, + drop_count, + multi_face_count, + ) + + logger.info( + "Merged %d multi-face card groups for %s (dropped %d extra faces)", + merged_count, + color, + drop_count, + ) + + def merge_multi_face_rows( df: pd.DataFrame, color: str, @@ -93,7 +219,6 @@ def merge_multi_face_rows( return df work_df = df.copy() - layout_series = work_df["layout"].fillna("").astype(str).str.lower() multi_mask = layout_series.isin(_MULTI_FACE_LAYOUTS) @@ -110,66 +235,15 @@ def merge_multi_face_rows( group_sorted = _sort_faces(group) primary_idx = group_sorted.index[0] - faces_payload: List[Dict[str, Any]] = [] - for column in _LIST_UNION_COLUMNS: - if column in group_sorted.columns: - union_values = _merge_object_lists(group_sorted[column]) - work_df.at[primary_idx, column] = union_values + _merge_tag_columns(work_df, group_sorted, primary_idx) - if "keywords" in group_sorted.columns: - keyword_union = _merge_keywords(group_sorted["keywords"]) - work_df.at[primary_idx, "keywords"] = _join_keywords(keyword_union) - - for _, face_row in group_sorted.iterrows(): - text_val = face_row.get("text") or face_row.get("oracleText") or "" - mana_cost_val = face_row.get("manaCost", face_row.get("mana_cost", "")) or "" - mana_value_raw = face_row.get("manaValue", face_row.get("mana_value", "")) - try: - if mana_value_raw in (None, ""): - mana_value_val = None - else: - mana_value_val = float(mana_value_raw) - if math.isnan(mana_value_val): - mana_value_val = None - except Exception: - mana_value_val = None - type_val = face_row.get("type", "") or "" - faces_payload.append( - { - "face": str(face_row.get("faceName") or face_row.get("name") or ""), - "side": str(face_row.get("side") or ""), - "layout": str(face_row.get("layout") or ""), - "themeTags": _merge_object_lists([face_row.get("themeTags", [])]), - "roleTags": _merge_object_lists([face_row.get("roleTags", [])]), - "type": str(type_val), - "text": str(text_val), - "mana_cost": str(mana_cost_val), - "mana_value": mana_value_val, - "produces_mana": _text_produces_mana(text_val), - "is_land": 'land' in str(type_val).lower(), - } - ) - - for idx in group_sorted.index[1:]: - drop_indices.append(idx) + faces_payload = [_build_face_payload(row) for _, row in group_sorted.iterrows()] + drop_indices.extend(group_sorted.index[1:]) + merged_count += 1 - layout_set = sorted({f.get("layout", "") for f in faces_payload if f.get("layout")}) - removed_faces = faces_payload[1:] if len(faces_payload) > 1 else [] - merge_details.append( - { - "name": name, - "total_faces": len(group_sorted), - "dropped_faces": max(len(group_sorted) - 1, 0), - "layouts": layout_set, - "primary_face": faces_payload[0] if faces_payload else {}, - "removed_faces": removed_faces, - "theme_tags": sorted({tag for face in faces_payload for tag in face.get("themeTags", [])}), - "role_tags": sorted({tag for face in faces_payload for tag in face.get("roleTags", [])}), - "faces": faces_payload, - } - ) + merge_details.append(_build_merge_detail(name, group_sorted, faces_payload)) if drop_indices: work_df = work_df.drop(index=drop_indices) @@ -192,38 +266,10 @@ def merge_multi_face_rows( logger.warning("Failed to record DFC merge summary for %s: %s", color, exc) if logger is not None: - try: - logger.info( - "dfc_merge_summary %s", - json.dumps( - { - "event": "dfc_merge_summary", - "color": color, - "groups_merged": merged_count, - "faces_dropped": len(drop_indices), - "multi_face_rows": int(multi_mask.sum()), - }, - sort_keys=True, - ), - ) - except Exception: - logger.info( - "dfc_merge_summary event=%s groups=%d dropped=%d rows=%d", - color, - merged_count, - len(drop_indices), - int(multi_mask.sum()), - ) - logger.info( - "Merged %d multi-face card groups for %s (dropped %d extra faces)", - merged_count, - color, - len(drop_indices), - ) + _log_merge_summary(color, merged_count, len(drop_indices), int(multi_mask.sum()), logger) _persist_merge_summary(color, summary_payload, logger) - # Reset index to keep downstream expectations consistent. return work_df.reset_index(drop=True) diff --git a/code/tagging/phasing_scope_detection.py b/code/tagging/phasing_scope_detection.py index b16a3d8..886b6cd 100644 --- a/code/tagging/phasing_scope_detection.py +++ b/code/tagging/phasing_scope_detection.py @@ -9,15 +9,97 @@ Detects the scope of phasing effects with multiple dimensions: - Blanket: Phasing (phases all permanents out) Cards can have multiple scope tags (e.g., Targeted + Your Permanents). + +Refactored in M2: Create Scope Detection Utilities to use generic scope detection. """ +# Standard library imports import re from typing import Set + +# Local application imports +from . import scope_detection_utils as scope_utils from code.logging_util import get_logger logger = get_logger(__name__) +# Phasing scope pattern definitions +def _get_phasing_scope_patterns() -> scope_utils.ScopePatterns: + """ + Build scope patterns for phasing abilities. + + Returns: + ScopePatterns object with compiled patterns + """ + # Targeting patterns (special for phasing - detects "target...phases out") + targeting_patterns = [ + re.compile(r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', re.IGNORECASE), + re.compile(r'target\s+player\s+controls[^.]*phases?\s+out', re.IGNORECASE), + ] + + # Self-reference patterns + self_patterns = [ + re.compile(r'this\s+(?:creature|permanent|artifact|enchantment)\s+phases?\s+out', re.IGNORECASE), + re.compile(r'~\s+phases?\s+out', re.IGNORECASE), + # Triggered self-phasing (King of the Oathbreakers) + re.compile(r'whenever.*(?:becomes\s+the\s+target|becomes\s+target).*(?:it|this\s+creature)\s+phases?\s+out', re.IGNORECASE), + # Consequent self-phasing (Cyclonus: "connive. Then...phase out") + re.compile(r'(?:then|,)\s+(?:it|this\s+creature)\s+phases?\s+out', re.IGNORECASE), + # At end of turn/combat self-phasing + re.compile(r'(?:at\s+(?:the\s+)?end\s+of|after).*(?:it|this\s+creature)\s+phases?\s+out', re.IGNORECASE), + ] + + # Opponent patterns + opponent_patterns = [ + re.compile(r'target\s+(?:\w+\s+)*(?:creature|permanent)\s+an?\s+opponents?\s+controls?\s+phases?\s+out', re.IGNORECASE), + # Unqualified targets (can target opponents' stuff if no "you control" restriction) + re.compile(r'(?:up\s+to\s+)?(?:one\s+|x\s+|that\s+many\s+)?(?:other\s+)?(?:another\s+)?target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', re.IGNORECASE), + re.compile(r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|land|nonland\s+permanent)(?:,|\s+and)?\s+(?:then|and)?\s+it\s+phases?\s+out', re.IGNORECASE), + ] + + # Your permanents patterns + your_patterns = [ + # Explicit "you control" + re.compile(r'(?:target\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', re.IGNORECASE), + re.compile(r'(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?)\s+you\s+control\s+phases?\s+out', re.IGNORECASE), + re.compile(r'permanents?\s+you\s+control\s+phase\s+out', re.IGNORECASE), + re.compile(r'(?:any|up\s+to)\s+(?:number\s+of\s+)?(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', re.IGNORECASE), + re.compile(r'all\s+(?:creatures?|permanents?)\s+you\s+control\s+phase\s+out', re.IGNORECASE), + re.compile(r'each\s+(?:creature|permanent)\s+you\s+control\s+phases?\s+out', re.IGNORECASE), + # Pronoun reference to "you control" context + re.compile(r'(?:creatures?|permanents?|planeswalkers?)\s+you\s+control[^.]*(?:those|the)\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', re.IGNORECASE), + re.compile(r'creature\s+you\s+control[^.]*(?:it)\s+phases?\s+out', re.IGNORECASE), + re.compile(r'you\s+control.*those\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', re.IGNORECASE), + # Equipment/Aura + re.compile(r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', re.IGNORECASE), + re.compile(r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', re.IGNORECASE), + re.compile(r'enchanted\s+(?:creature|permanent)\s+(?:has|gains?)\s+phasing', re.IGNORECASE), + re.compile(r'(?:equipped|enchanted)\s+(?:creature|permanent)[^.]*,?\s+(?:then\s+)?that\s+(?:creature|permanent)\s+phases?\s+out', re.IGNORECASE), + # Target controlled by specific player + re.compile(r'(?:each|target)\s+(?:creature|permanent)\s+target\s+player\s+controls\s+phases?\s+out', re.IGNORECASE), + ] + + # Blanket patterns + blanket_patterns = [ + re.compile(r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)(?:\s+of\s+that\s+type)?\s+(?:[^.]*\s+)?phase\s+out', re.IGNORECASE), + re.compile(r'each\s+(?:creature|permanent)\s+(?:[^.]*\s+)?phases?\s+out', re.IGNORECASE), + # Type-specific blanket (Shimmer) + re.compile(r'each\s+(?:land|creature|permanent|artifact|enchantment)\s+of\s+the\s+chosen\s+type\s+has\s+phasing', re.IGNORECASE), + re.compile(r'(?:lands?|creatures?|permanents?|artifacts?|enchantments?)\s+of\s+the\s+chosen\s+type\s+(?:have|has)\s+phasing', re.IGNORECASE), + # Pronoun reference to "all creatures" + re.compile(r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)[^.]*,?\s+(?:then\s+)?(?:those|the)\s+(?:creatures?|permanents?)\s+phase\s+out', re.IGNORECASE), + ] + + return scope_utils.ScopePatterns( + opponent=opponent_patterns, + self_ref=self_patterns, + your_permanents=your_patterns, + blanket=blanket_patterns, + targeted=targeting_patterns + ) + + def get_phasing_scope_tags(text: str, card_name: str, keywords: str = '') -> Set[str]: """ Get all phasing scope metadata tags for a card. @@ -47,121 +129,46 @@ def get_phasing_scope_tags(text: str, card_name: str, keywords: str = '') -> Set # Check for static "Phasing" keyword ability (self-phasing) # Only add Self tag if card doesn't grant phasing to others if 'phasing' in keywords_lower: - # Remove reminder text to avoid false positives - text_no_reminder = re.sub(r'\([^)]*\)', '', text_lower) - - # Check if card grants phasing to others (has granting language in main text) - # Look for patterns like "enchanted creature has", "other X have", "target", etc. - grants_to_others = bool(re.search( + # Define patterns for checking if card grants phasing to others + grants_pattern = [re.compile( r'(other|target|each|all|enchanted|equipped|creatures? you control|permanents? you control).*phas', - text_no_reminder - )) + re.IGNORECASE + )] - # If no granting language, it's just self-phasing - if not grants_to_others: + is_static = scope_utils.check_static_keyword_legacy( + keywords=keywords, + static_keyword='phasing', + text=text, + grant_patterns=grants_pattern + ) + + if is_static: tags.add('Self: Phasing') return tags # Early return - static keyword only - # Check if phasing is mentioned in text (including "has phasing", "gain phasing", etc.) - if 'phas' not in text_lower: # Changed from 'phase' to 'phas' to catch "phasing" too + # Check if phasing is mentioned in text + if 'phas' not in text_lower: return tags - # Check for targeting (any "target" + phasing) - # Targeting detection - must have target AND phase in same sentence/clause - targeting_patterns = [ - r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', - r'target\s+player\s+controls[^.]*phases?\s+out', - ] + # Build phasing patterns and detect scopes + patterns = _get_phasing_scope_patterns() - is_targeted = any(re.search(pattern, text_lower) for pattern in targeting_patterns) + # Detect all scopes (phasing can have multiple) + scopes = scope_utils.detect_multi_scope( + text=text, + card_name=card_name, + ability_keyword='phas', # Use 'phas' to catch both 'phase' and 'phasing' + patterns=patterns, + check_grant_verbs=False # Phasing doesn't need grant verb checking + ) - if is_targeted: - tags.add("Targeted: Phasing") - logger.debug(f"Card '{card_name}': detected Targeted: Phasing") - - # Check for self-phasing - self_patterns = [ - r'this\s+(?:creature|permanent|artifact|enchantment)\s+phases?\s+out', - r'~\s+phases?\s+out', - rf'\b{re.escape(card_name.lower())}\s+phases?\s+out', - # NEW: Triggered self-phasing (King of the Oathbreakers: "it phases out" as reactive protection) - r'whenever.*(?:becomes\s+the\s+target|becomes\s+target).*(?:it|this\s+creature)\s+phases?\s+out', - # NEW: Consequent self-phasing (Cyclonus: "connive. Then...phase out") - r'(?:then|,)\s+(?:it|this\s+creature)\s+phases?\s+out', - # NEW: At end of turn/combat self-phasing - r'(?:at\s+(?:the\s+)?end\s+of|after).*(?:it|this\s+creature)\s+phases?\s+out', - ] - - if any(re.search(pattern, text_lower) for pattern in self_patterns): - tags.add("Self: Phasing") - logger.debug(f"Card '{card_name}': detected Self: Phasing") - - # Check for opponent permanent phasing (removal effect) - opponent_patterns = [ - r'target\s+(?:\w+\s+)*(?:creature|permanent)\s+an?\s+opponents?\s+controls?\s+phases?\s+out', - ] - - # Check for unqualified targets (can target opponents' stuff) - # More flexible to handle various phasing patterns - unqualified_target_patterns = [ - r'(?:up\s+to\s+)?(?:one\s+|x\s+|that\s+many\s+)?(?:other\s+)?(?:another\s+)?target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|nonland\s+permanent)s?(?:[^.]*)?phases?\s+out', - r'target\s+(?:\w+\s+)*(?:creature|permanent|artifact|enchantment|land|nonland\s+permanent)(?:,|\s+and)?\s+(?:then|and)?\s+it\s+phases?\s+out', - ] - - has_opponent_specific = any(re.search(pattern, text_lower) for pattern in opponent_patterns) - has_unqualified_target = any(re.search(pattern, text_lower) for pattern in unqualified_target_patterns) - - # If unqualified AND not restricted to "you control", can target opponents - if has_opponent_specific or (has_unqualified_target and 'you control' not in text_lower): - tags.add("Opponent Permanents: Phasing") - logger.debug(f"Card '{card_name}': detected Opponent Permanents: Phasing") - - # Check for your permanents phasing - your_patterns = [ - # Explicit "you control" - r'(?:target\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', - r'(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?)\s+you\s+control\s+phases?\s+out', - r'permanents?\s+you\s+control\s+phase\s+out', - r'(?:any|up\s+to)\s+(?:number\s+of\s+)?(?:target\s+)?(?:other\s+)?(?:creatures?|permanents?|nonland\s+permanents?)\s+you\s+control\s+phases?\s+out', - r'all\s+(?:creatures?|permanents?)\s+you\s+control\s+phase\s+out', - r'each\s+(?:creature|permanent)\s+you\s+control\s+phases?\s+out', - # Pronoun reference to "you control" context - r'(?:creatures?|permanents?|planeswalkers?)\s+you\s+control[^.]*(?:those|the)\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', - r'creature\s+you\s+control[^.]*(?:it)\s+phases?\s+out', - # "Those permanents" referring back to controlled permanents (across sentence boundaries) - r'you\s+control.*those\s+(?:creatures?|permanents?|planeswalkers?)\s+phase\s+out', - # Equipment/Aura (beneficial to your permanents) - r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', - r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?phases?\s+out', - r'enchanted\s+(?:creature|permanent)\s+(?:has|gains?)\s+phasing', # NEW: "has phasing" for Cloak of Invisibility, Teferi's Curse - # Pronoun reference after equipped/enchanted creature mentioned - r'(?:equipped|enchanted)\s+(?:creature|permanent)[^.]*,?\s+(?:then\s+)?that\s+(?:creature|permanent)\s+phases?\s+out', - # Target controlled by specific player - r'(?:each|target)\s+(?:creature|permanent)\s+target\s+player\s+controls\s+phases?\s+out', - ] - - if any(re.search(pattern, text_lower) for pattern in your_patterns): - tags.add("Your Permanents: Phasing") - logger.debug(f"Card '{card_name}': detected Your Permanents: Phasing") - - # Check for blanket phasing (all permanents, no ownership) - blanket_patterns = [ - r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)(?:\s+of\s+that\s+type)?\s+(?:[^.]*\s+)?phase\s+out', - r'each\s+(?:creature|permanent)\s+(?:[^.]*\s+)?phases?\s+out', - # NEW: Type-specific blanket (Shimmer: "Each land of the chosen type has phasing") - r'each\s+(?:land|creature|permanent|artifact|enchantment)\s+of\s+the\s+chosen\s+type\s+has\s+phasing', - r'(?:lands?|creatures?|permanents?|artifacts?|enchantments?)\s+of\s+the\s+chosen\s+type\s+(?:have|has)\s+phasing', - # Pronoun reference to "all creatures" - r'all\s+(?:nontoken\s+)?(?:creatures?|permanents?)[^.]*,?\s+(?:then\s+)?(?:those|the)\s+(?:creatures?|permanents?)\s+phase\s+out', - ] - - # Only blanket if no specific ownership mentioned - has_blanket_pattern = any(re.search(pattern, text_lower) for pattern in blanket_patterns) - no_ownership = 'you control' not in text_lower and 'target player controls' not in text_lower and 'opponent' not in text_lower - - if has_blanket_pattern and no_ownership: - tags.add("Blanket: Phasing") - logger.debug(f"Card '{card_name}': detected Blanket: Phasing") + # Format scope tags with "Phasing" ability name + for scope in scopes: + if scope == "Targeted": + tags.add("Targeted: Phasing") + else: + tags.add(scope_utils.format_scope_tag(scope, "Phasing")) + logger.debug(f"Card '{card_name}': detected {scope}: Phasing") return tags diff --git a/code/tagging/protection_grant_detection.py b/code/tagging/protection_grant_detection.py index a88a86b..f183efc 100644 --- a/code/tagging/protection_grant_detection.py +++ b/code/tagging/protection_grant_detection.py @@ -10,126 +10,135 @@ Usage in tagger.py: if is_granting_protection(text, keywords): # Tag as Protection """ - import re -from typing import Set, List, Pattern - -from code.tagging.tag_constants import CREATURE_TYPES +from typing import List, Pattern, Set +from . import regex_patterns as rgx +from . import tag_utils +from .tag_constants import CONTEXT_WINDOW_SIZE, CREATURE_TYPES, PROTECTION_KEYWORDS # Pre-compile kindred detection patterns at module load for performance # Pattern: (compiled_regex, tag_name_template) -KINDRED_PATTERNS: List[tuple[Pattern, str]] = [] - -def _init_kindred_patterns(): - """Initialize pre-compiled kindred patterns for all creature types.""" - global KINDRED_PATTERNS - if KINDRED_PATTERNS: - return # Already initialized +def _build_kindred_patterns() -> List[tuple[Pattern, str]]: + """Build pre-compiled kindred patterns for all creature types. + + Returns: + List of tuples containing (compiled_pattern, tag_name) + """ + patterns = [] for creature_type in CREATURE_TYPES: creature_lower = creature_type.lower() creature_escaped = re.escape(creature_lower) tag_name = f"{creature_type}s Gain Protection" - - # Create 3 patterns per type - patterns_to_compile = [ - (rf'\bother {creature_escaped}s?\b.*\b(have|gain)\b', tag_name), - (rf'\b{creature_escaped} creatures?\b.*\b(have|gain)\b', tag_name), - (rf'\btarget {creature_escaped}\b.*\bgains?\b', tag_name), + pattern_templates = [ + rf'\bother {creature_escaped}s?\b.*\b(have|gain)\b', + rf'\b{creature_escaped} creatures?\b.*\b(have|gain)\b', + rf'\btarget {creature_escaped}\b.*\bgains?\b', ] - for pattern_str, tag in patterns_to_compile: + for pattern_str in pattern_templates: try: compiled = re.compile(pattern_str, re.IGNORECASE) - KINDRED_PATTERNS.append((compiled, tag)) + patterns.append((compiled, tag_name)) except re.error: # Skip patterns that fail to compile pass + + return patterns +KINDRED_PATTERNS: List[tuple[Pattern, str]] = _build_kindred_patterns() # Grant verb patterns - cards that give protection to other permanents # These patterns look for grant verbs that affect OTHER permanents, not self # M5: Added phasing support -GRANT_VERB_PATTERNS = [ - r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', - r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', - r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', - r'\bhave\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "have hexproof" static grants - r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "gets +X/+X and has hexproof" direct - r'\bget[s]?\b.*\+.*\band\b.*\b(gain[s]?|have)\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', # "gets +X/+X and gains hexproof" - r'\bphases? out\b', # M5: Direct phasing triggers (e.g., "it phases out") +# Pre-compiled at module load for performance +GRANT_VERB_PATTERNS: List[Pattern] = [ + re.compile(r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), + re.compile(r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), + re.compile(r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), + re.compile(r'\bhave\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), # "have hexproof" static grants + re.compile(r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), # "gets +X/+X and has hexproof" direct + re.compile(r'\bget[s]?\b.*\+.*\band\b.*\b(gain[s]?|have)\b.*\b(hexproof|shroud|indestructible|ward|protection|phasing)\b', re.IGNORECASE), # "gets +X/+X and gains hexproof" + re.compile(r'\bphases? out\b', re.IGNORECASE), # M5: Direct phasing triggers (e.g., "it phases out") ] # Self-reference patterns that should NOT count as granting # Reminder text and keyword lines only # M5: Added phasing support -SELF_REFERENCE_PATTERNS = [ - r'^\s*(hexproof|shroud|indestructible|ward|protection|phasing)', # Start of text (keyword ability) - r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection|phasing)[^)]*\)', # Reminder text in parens +# Pre-compiled at module load for performance +SELF_REFERENCE_PATTERNS: List[Pattern] = [ + re.compile(r'^\s*(hexproof|shroud|indestructible|ward|protection|phasing)', re.IGNORECASE), # Start of text (keyword ability) + re.compile(r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection|phasing)[^)]*\)', re.IGNORECASE), # Reminder text in parens ] # Conditional self-grant patterns - activated/triggered abilities that grant to self -CONDITIONAL_SELF_GRANT_PATTERNS = [ +# Pre-compiled at module load for performance +CONDITIONAL_SELF_GRANT_PATTERNS: List[Pattern] = [ # Activated abilities - r'\{[^}]*\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'discard.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', - r'\{t\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', - r'sacrifice.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', - r'pay.*life.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', + re.compile(r'\{[^}]*\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), + re.compile(r'discard.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', re.IGNORECASE), + re.compile(r'\{t\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', re.IGNORECASE), + re.compile(r'sacrifice.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', re.IGNORECASE), + re.compile(r'pay.*life.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b', re.IGNORECASE), # Triggered abilities that grant to self only - r'whenever.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'whenever you (cast|play|attack|cycle|discard|commit).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'at the beginning.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'whenever.*\b(this creature|this permanent)\b (attacks|enters|becomes).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b', + re.compile(r'whenever.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), + re.compile(r'whenever you (cast|play|attack|cycle|discard|commit).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), + re.compile(r'at the beginning.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), + re.compile(r'whenever.*\b(this creature|this permanent)\b (attacks|enters|becomes).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b', re.IGNORECASE), # Named self-references (e.g., "Pristine Skywise gains") - r'whenever you cast.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b', - r'whenever you.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b', + re.compile(r'whenever you cast.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), + re.compile(r'whenever you.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), # Static conditional abilities (as long as, if you control X) - r'as long as.*\b(this creature|this permanent|it|has)\b.*(has|gains?).*\b(hexproof|shroud|indestructible|ward|protection)\b', + re.compile(r'as long as.*\b(this creature|this permanent|it|has)\b.*(has|gains?).*\b(hexproof|shroud|indestructible|ward|protection)\b', re.IGNORECASE), ] # Mass grant patterns - affects multiple creatures YOU control -MASS_GRANT_PATTERNS = [ - r'creatures you control (have|gain|get)', - r'other .* you control (have|gain|get)', - r'(artifacts?|enchantments?|permanents?) you control (have|gain|get)', # Artifacts you control have... - r'other (creatures?|artifacts?|enchantments?) (have|gain|get)', # Other creatures have... - r'all (creatures?|slivers?|permanents?) (have|gain|get)', # All creatures/slivers have... +# Pre-compiled at module load for performance +MASS_GRANT_PATTERNS: List[Pattern] = [ + re.compile(r'creatures you control (have|gain|get)', re.IGNORECASE), + re.compile(r'other .* you control (have|gain|get)', re.IGNORECASE), + re.compile(r'(artifacts?|enchantments?|permanents?) you control (have|gain|get)', re.IGNORECASE), # Artifacts you control have... + re.compile(r'other (creatures?|artifacts?|enchantments?) (have|gain|get)', re.IGNORECASE), # Other creatures have... + re.compile(r'all (creatures?|slivers?|permanents?) (have|gain|get)', re.IGNORECASE), # All creatures/slivers have... ] # Targeted grant patterns - must specify "you control" -TARGETED_GRANT_PATTERNS = [ - r'target .* you control (gains?|gets?|has)', - r'equipped creature (gains?|gets?|has)', - r'enchanted creature (gains?|gets?|has)', +# Pre-compiled at module load for performance +TARGETED_GRANT_PATTERNS: List[Pattern] = [ + re.compile(r'target .* you control (gains?|gets?|has)', re.IGNORECASE), + re.compile(r'equipped creature (gains?|gets?|has)', re.IGNORECASE), + re.compile(r'enchanted enchantment (gains?|gets?|has)', re.IGNORECASE), ] # Exclusion patterns - cards that remove or prevent protection -EXCLUSION_PATTERNS = [ - r"can't have (hexproof|indestructible|ward|shroud)", - r"lose[s]? (hexproof|indestructible|ward|shroud|protection)", - r"without (hexproof|indestructible|ward|shroud)", - r"protection from.*can't", +# Pre-compiled at module load for performance +EXCLUSION_PATTERNS: List[Pattern] = [ + re.compile(r"can't have (hexproof|indestructible|ward|shroud)", re.IGNORECASE), + re.compile(r"lose[s]? (hexproof|indestructible|ward|shroud|protection)", re.IGNORECASE), + re.compile(r"without (hexproof|indestructible|ward|shroud)", re.IGNORECASE), + re.compile(r"protection from.*can't", re.IGNORECASE), ] # Opponent grant patterns - grants to opponent's permanents (EXCLUDE these) # NOTE: "all creatures" and "all permanents" are BLANKET effects (help you too), # not opponent grants. Only exclude effects that ONLY help opponents. -OPPONENT_GRANT_PATTERNS = [ - r'target opponent', - r'each opponent', - r'opponents? control', # creatures your opponents control - r'opponent.*permanents?.*have', # opponent's permanents have +# Pre-compiled at module load for performance +OPPONENT_GRANT_PATTERNS: List[Pattern] = [ + rgx.TARGET_OPPONENT, + rgx.EACH_OPPONENT, + rgx.OPPONENT_CONTROL, + re.compile(r'opponent.*permanents?.*have', re.IGNORECASE), # opponent's permanents have ] # Blanket grant patterns - affects all permanents regardless of controller # These are VALID protection grants that should be tagged (Blanket scope in M5) -BLANKET_GRANT_PATTERNS = [ - r'\ball creatures? (have|gain|get)\b', # All creatures gain hexproof - r'\ball permanents? (have|gain|get)\b', # All permanents gain indestructible - r'\beach creature (has|gains?|gets?)\b', # Each creature gains ward - r'\beach player\b', # Each player gains hexproof (very rare but valid blanket) +# Pre-compiled at module load for performance +BLANKET_GRANT_PATTERNS: List[Pattern] = [ + re.compile(r'\ball creatures? (have|gain|get)\b', re.IGNORECASE), # All creatures gain hexproof + re.compile(r'\ball permanents? (have|gain|get)\b', re.IGNORECASE), # All permanents gain indestructible + re.compile(r'\beach creature (has|gains?|gets?)\b', re.IGNORECASE), # Each creature gains ward + rgx.EACH_PLAYER, # Each player gains hexproof (very rare but valid blanket) ] # Kindred-specific grant patterns for metadata tagging @@ -178,16 +187,6 @@ KINDRED_GRANT_PATTERNS = { ], } -# Protection keyword patterns for inherent check -PROTECTION_KEYWORDS = { - 'hexproof', - 'shroud', - 'indestructible', - 'ward', - 'protection from', - 'protection', -} - def get_kindred_protection_tags(text: str) -> Set[str]: """ @@ -207,9 +206,6 @@ def get_kindred_protection_tags(text: str) -> Set[str]: if not text: return set() - # Initialize pre-compiled patterns if needed - _init_kindred_patterns() - text_lower = text.lower() tags = set() @@ -217,13 +213,11 @@ def get_kindred_protection_tags(text: str) -> Set[str]: protective_abilities = ['hexproof', 'shroud', 'indestructible', 'ward', 'protection'] if not any(keyword in text_lower for keyword in protective_abilities): return tags - - # Check predefined patterns (specific kindred types we track) for tag_base, patterns in KINDRED_GRANT_PATTERNS.items(): for pattern in patterns: - match = re.search(pattern, text_lower, re.IGNORECASE) + pattern_compiled = re.compile(pattern, re.IGNORECASE) if isinstance(pattern, str) else pattern + match = pattern_compiled.search(text_lower) if match: - # Extract creature type from tag_base (e.g., "Knights" from "Knights Gain Protection") creature_type = tag_base.split(' Gain ')[0] # Get the matched text to check which abilities are in this specific grant matched_text = match.group(0) @@ -244,7 +238,6 @@ def get_kindred_protection_tags(text: str) -> Set[str]: for compiled_pattern, tag_template in KINDRED_PATTERNS: match = compiled_pattern.search(text_lower) if match: - # Extract creature type from tag_template (e.g., "Knights" from "Knights Gain Protection") creature_type = tag_template.split(' Gain ')[0] # Get the matched text to check which abilities are in this specific grant matched_text = match.group(0) @@ -278,18 +271,16 @@ def is_opponent_grant(text: str) -> bool: # Remove reminder text (in parentheses) to avoid false positives # Reminder text often mentions "opponents control" for hexproof/shroud explanations - text_no_reminder = re.sub(r'\([^)]*\)', '', text_lower) - - # Check for opponent-specific grant patterns in the main text (not reminder) + text_no_reminder = tag_utils.strip_reminder_text(text_lower) for pattern in OPPONENT_GRANT_PATTERNS: - match = re.search(pattern, text_no_reminder, re.IGNORECASE) + match = pattern.search(text_no_reminder) if match: # Must be in context of granting protection if any(prot in text_lower for prot in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']): - # Check the context around the match - context_start = max(0, match.start() - 30) - context_end = min(len(text_no_reminder), match.end() + 70) - context = text_no_reminder[context_start:context_end] + context = tag_utils.extract_context_window( + text_no_reminder, match.start(), match.end(), + window_size=CONTEXT_WINDOW_SIZE, include_before=True + ) # If "you control" appears in the context, it's limiting to YOUR permanents, not opponents if 'you control' not in context: @@ -307,10 +298,8 @@ def has_conditional_self_grant(text: str) -> bool: return False text_lower = text.lower() - - # Check for conditional self-grant patterns (activated/triggered abilities) for pattern in CONDITIONAL_SELF_GRANT_PATTERNS: - if re.search(pattern, text_lower, re.IGNORECASE): + if pattern.search(text_lower): return True return False @@ -331,30 +320,121 @@ def is_conditional_self_grant(text: str) -> bool: return False text_lower = text.lower() - - # Check if it has conditional self-grant patterns found_conditional_self = has_conditional_self_grant(text) if not found_conditional_self: return False # If we found a conditional self-grant, check if there's ALSO a grant to others - # Look for patterns that grant to creatures besides itself - has_other_grant = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in [ - r'other creatures', - r'creatures you control (have|gain)', - r'target (creature|permanent) you control gains', - r'another target (creature|permanent)', - r'equipped creature (has|gains)', - r'enchanted creature (has|gains)', - r'target legendary', - r'permanents you control gain', - ]) + other_grant_patterns = [ + rgx.OTHER_CREATURES, + re.compile(r'creatures you control (have|gain)', re.IGNORECASE), + re.compile(r'target (creature|permanent) you control gains', re.IGNORECASE), + re.compile(r'another target (creature|permanent)', re.IGNORECASE), + re.compile(r'equipped creature (has|gains)', re.IGNORECASE), + re.compile(r'enchanted creature (has|gains)', re.IGNORECASE), + re.compile(r'target legendary', re.IGNORECASE), + re.compile(r'permanents you control gain', re.IGNORECASE), + ] + has_other_grant = any(pattern.search(text_lower) for pattern in other_grant_patterns) # Return True only if it's ONLY conditional self-grants (no other grants) return not has_other_grant +def _should_exclude_token_creation(text_lower: str) -> bool: + """Check if card only creates tokens with protection (not granting to existing permanents). + + Args: + text_lower: Lowercased card text + + Returns: + True if card only creates tokens, False if it also grants + """ + token_with_protection = re.compile(r'create.*token.*with.*(hexproof|shroud|indestructible|ward|protection)', re.IGNORECASE) + if token_with_protection.search(text_lower): + has_grant_to_others = any(pattern.search(text_lower) for pattern in MASS_GRANT_PATTERNS) + return not has_grant_to_others + return False + + +def _should_exclude_kindred_only(text: str, text_lower: str, exclude_kindred: bool) -> bool: + """Check if card only grants to specific kindred types. + + Args: + text: Original card text + text_lower: Lowercased card text + exclude_kindred: Whether to exclude kindred-specific grants + + Returns: + True if card only has kindred grants, False if it has broad grants + """ + if not exclude_kindred: + return False + + kindred_tags = get_kindred_protection_tags(text) + if not kindred_tags: + return False + broad_only_patterns = [ + re.compile(r'\bcreatures you control (have|gain)\b(?!.*(knight|merfolk|zombie|elf|dragon|goblin|sliver))', re.IGNORECASE), + re.compile(r'\bpermanents you control (have|gain)\b', re.IGNORECASE), + re.compile(r'\beach (creature|permanent) you control', re.IGNORECASE), + re.compile(r'\ball (creatures?|permanents?)', re.IGNORECASE), + ] + + has_broad_grant = any(pattern.search(text_lower) for pattern in broad_only_patterns) + return not has_broad_grant + + +def _check_pattern_grants(text_lower: str, pattern_list: List[Pattern]) -> bool: + """Check if text contains protection grants matching pattern list. + + Args: + text_lower: Lowercased card text + pattern_list: List of grant patterns to check + + Returns: + True if protection grant found, False otherwise + """ + for pattern in pattern_list: + match = pattern.search(text_lower) + if match: + context = tag_utils.extract_context_window(text_lower, match.start(), match.end()) + if any(prot in context for prot in PROTECTION_KEYWORDS): + return True + return False + + +def _has_inherent_protection_only(text_lower: str, keywords: str, found_grant: bool) -> bool: + """Check if card only has inherent protection without granting. + + Args: + text_lower: Lowercased card text + keywords: Card keywords + found_grant: Whether a grant pattern was found + + Returns: + True if card only has inherent protection, False otherwise + """ + if not keywords: + return False + + keywords_lower = keywords.lower() + has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS) + + if not has_inherent or found_grant: + return False + stat_only_pattern = re.compile(r'(get[s]?|gain[s]?)\s+[+\-][0-9X]+/[+\-][0-9X]+', re.IGNORECASE) + has_stat_only = bool(stat_only_pattern.search(text_lower)) + mentions_other_without_prot = False + if 'other' in text_lower: + other_idx = text_lower.find('other') + remaining_text = text_lower[other_idx:] + mentions_other_without_prot = not any(prot in remaining_text for prot in PROTECTION_KEYWORDS) + + return has_stat_only or mentions_other_without_prot + + def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = False) -> bool: """ Determine if a card grants protection effects to other permanents. @@ -381,117 +461,32 @@ def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = Fal text_lower = text.lower() - # EXCLUDE: Opponent grants + # Early exclusion checks if is_opponent_grant(text): return False - # EXCLUDE: Conditional self-grants only if is_conditional_self_grant(text): return False - # EXCLUDE: Cards that remove protection - for pattern in EXCLUSION_PATTERNS: - if re.search(pattern, text_lower, re.IGNORECASE): - return False + if any(pattern.search(text_lower) for pattern in EXCLUSION_PATTERNS): + return False - # EXCLUDE: Token creation with protection (not granting to existing permanents) - if re.search(r'create.*token.*with.*(hexproof|shroud|indestructible|ward|protection)', text_lower, re.IGNORECASE): - # Check if there's ALSO granting to other permanents - has_grant_to_others = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in MASS_GRANT_PATTERNS) - if not has_grant_to_others: - return False + if _should_exclude_token_creation(text_lower): + return False - # EXCLUDE: Kindred-specific grants if requested - if exclude_kindred: - kindred_tags = get_kindred_protection_tags(text) - if kindred_tags: - # If we detected kindred tags, check if there's ALSO a non-kindred grant - # Look for grant patterns that explicitly grant to ALL creatures/permanents broadly - has_broad_grant = False - - # Patterns that indicate truly broad grants (not type-specific) - broad_only_patterns = [ - r'\bcreatures you control (have|gain)\b(?!.*(knight|merfolk|zombie|elf|dragon|goblin|sliver))', # Only if not followed by type - r'\bpermanents you control (have|gain)\b', - r'\beach (creature|permanent) you control', - r'\ball (creatures?|permanents?)', - ] - - for pattern in broad_only_patterns: - if re.search(pattern, text_lower, re.IGNORECASE): - has_broad_grant = True - break - - if not has_broad_grant: - return False # Only kindred grants, exclude - - # Check if card has inherent protection keywords - has_inherent = False - if keywords: - keywords_lower = keywords.lower() - has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS) - - # Check for explicit grants with protection keywords + if _should_exclude_kindred_only(text, text_lower, exclude_kindred): + return False found_grant = False - - # Blanket grant patterns (all creatures gain hexproof) - these are VALID grants - for pattern in BLANKET_GRANT_PATTERNS: - match = re.search(pattern, text_lower, re.IGNORECASE) - if match: - # Check if protection keyword appears nearby - context_start = match.start() - context_end = min(len(text_lower), match.end() + 70) - context = text_lower[context_start:context_end] - - if any(prot in context for prot in PROTECTION_KEYWORDS): - found_grant = True - break - - # Mass grant patterns (creatures you control have/gain) - if not found_grant: - for pattern in MASS_GRANT_PATTERNS: - match = re.search(pattern, text_lower, re.IGNORECASE) - if match: - # Check if protection keyword appears in the same sentence or nearby (within 70 chars AFTER the match) - # This ensures we're looking at "creatures you control HAVE hexproof" not just having both phrases - context_start = match.start() - context_end = min(len(text_lower), match.end() + 70) - context = text_lower[context_start:context_end] - - if any(prot in context for prot in PROTECTION_KEYWORDS): - found_grant = True - break - - # Targeted grant patterns (target creature gains) - if not found_grant: - for pattern in TARGETED_GRANT_PATTERNS: - match = re.search(pattern, text_lower, re.IGNORECASE) - if match: - # Check if protection keyword appears after the grant verb (within 70 chars) - context_start = match.start() - context_end = min(len(text_lower), match.end() + 70) - context = text_lower[context_start:context_end] - - if any(prot in context for prot in PROTECTION_KEYWORDS): - found_grant = True - break - - # Grant verb patterns (creature gains/gets hexproof) - if not found_grant: - for pattern in GRANT_VERB_PATTERNS: - if re.search(pattern, text_lower, re.IGNORECASE): - found_grant = True - break - - # If we have inherent protection and the ONLY text is about stats (no grant words), exclude - if has_inherent and not found_grant: - # Check if text only talks about other stats (power/toughness, +X/+X) - has_stat_only = bool(re.search(r'(get[s]?|gain[s]?)\s+[+\-][0-9X]+/[+\-][0-9X]+', text_lower)) - # Check if text mentions "other" without protection keywords - mentions_other_without_prot = 'other' in text_lower and not any(prot in text_lower for prot in PROTECTION_KEYWORDS if prot in text_lower[text_lower.find('other'):]) - - if has_stat_only or mentions_other_without_prot: - return False + if _check_pattern_grants(text_lower, BLANKET_GRANT_PATTERNS): + found_grant = True + elif _check_pattern_grants(text_lower, MASS_GRANT_PATTERNS): + found_grant = True + elif _check_pattern_grants(text_lower, TARGETED_GRANT_PATTERNS): + found_grant = True + elif any(pattern.search(text_lower) for pattern in GRANT_VERB_PATTERNS): + found_grant = True + if _has_inherent_protection_only(text_lower, keywords, found_grant): + return False return found_grant @@ -516,25 +511,14 @@ def categorize_protection_card(name: str, text: str, keywords: str, card_type: s 'Neither' - false positive """ keywords_lower = keywords.lower() if keywords else '' - - # Check for opponent grants first if is_opponent_grant(text): return 'Opponent' - - # Check for conditional self-grants (ONLY self, no other grants) if is_conditional_self_grant(text): return 'ConditionalSelf' - - # Check if it has conditional self-grant (may also have other grants) has_cond_self = has_conditional_self_grant(text) - - # Check if it has inherent protection has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS) - - # Check for kindred-specific grants kindred_tags = get_kindred_protection_tags(text) if kindred_tags and exclude_kindred: - # Check if there's ALSO a broad grant (excluding kindred) grants_broad = is_granting_protection(text, keywords, exclude_kindred=True) if grants_broad and has_inherent: @@ -551,8 +535,6 @@ def categorize_protection_card(name: str, text: str, keywords: str, card_type: s else: # Only kindred grants, no inherent or broad return 'Kindred' - - # Check if it grants protection broadly (not kindred-specific) grants_protection = is_granting_protection(text, keywords, exclude_kindred=exclude_kindred) # Categorize based on what it does diff --git a/code/tagging/protection_scope_detection.py b/code/tagging/protection_scope_detection.py index bffc768..e6793ab 100644 --- a/code/tagging/protection_scope_detection.py +++ b/code/tagging/protection_scope_detection.py @@ -5,39 +5,99 @@ Detects the scope of protection effects (Self, Your Permanents, Blanket, Opponen to enable intelligent filtering in deck building. Part of M5: Protection Effect Granularity milestone. +Refactored in M2: Create Scope Detection Utilities to use generic scope detection. """ +# Standard library imports import re from typing import Optional, Set + +# Local application imports from code.logging_util import get_logger +from . import scope_detection_utils as scope_utils +from .tag_constants import PROTECTION_ABILITIES logger = get_logger(__name__) -# Protection abilities to detect -PROTECTION_ABILITIES = [ - 'Protection', - 'Ward', - 'Hexproof', - 'Shroud', - 'Indestructible' -] +# Protection scope pattern definitions +def _get_protection_scope_patterns(ability: str) -> scope_utils.ScopePatterns: + """ + Build scope patterns for protection abilities. + + Args: + ability: Ability keyword (e.g., "hexproof", "ward") + + Returns: + ScopePatterns object with compiled patterns + """ + ability_lower = ability.lower() + + # Opponent patterns: grants protection TO opponent's permanents + # Note: Must distinguish from hexproof reminder text "opponents control [spells/abilities]" + opponent_patterns = [ + re.compile(r'creatures?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', re.IGNORECASE), + re.compile(r'permanents?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', re.IGNORECASE), + re.compile(r'each\s+creature\s+an?\s+opponent\s+controls?\s+(?:has|gains?)', re.IGNORECASE), + ] + + # Self-reference patterns + self_patterns = [ + # Tilde (~) - strong self-reference indicator + re.compile(r'~\s+(?:has|gains?)\s+' + ability_lower, re.IGNORECASE), + re.compile(r'~\s+is\s+' + ability_lower, re.IGNORECASE), + # "this creature/permanent" pronouns + re.compile(r'this\s+(?:creature|permanent|artifact|enchantment)\s+(?:has|gains?)\s+' + ability_lower, re.IGNORECASE), + # Starts with ability (likely self) + re.compile(r'^(?:has|gains?)\s+' + ability_lower, re.IGNORECASE), + ] + + # Your permanents patterns + your_patterns = [ + re.compile(r'(?:other\s+)?(?:creatures?|permanents?|artifacts?|enchantments?)\s+you\s+control', re.IGNORECASE), + re.compile(r'your\s+(?:creatures?|permanents?|artifacts?|enchantments?)', re.IGNORECASE), + re.compile(r'each\s+(?:creature|permanent)\s+you\s+control', re.IGNORECASE), + re.compile(r'other\s+\w+s?\s+you\s+control', re.IGNORECASE), # "Other Merfolk you control", etc. + # "Other X you control...have Y" pattern for static grants + re.compile(r'other\s+(?:\w+\s+)?(?:creatures?|permanents?)\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, re.IGNORECASE), + re.compile(r'other\s+\w+s?\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, re.IGNORECASE), # "Other Knights you control...have" + re.compile(r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, re.IGNORECASE), # Equipment + re.compile(r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, re.IGNORECASE), # Aura + re.compile(r'target\s+(?:\w+\s+)?(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:gains?)\s+' + ability_lower, re.IGNORECASE), # Target + ] + + # Blanket patterns (no ownership qualifier) + # Note: Abilities can be listed with "and" (e.g., "gain hexproof and indestructible") + blanket_patterns = [ + re.compile(r'all\s+(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower, re.IGNORECASE), + re.compile(r'each\s+(?:creature|permanent)\s+(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, re.IGNORECASE), + re.compile(r'(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower, re.IGNORECASE), + ] + + return scope_utils.ScopePatterns( + opponent=opponent_patterns, + self_ref=self_patterns, + your_permanents=your_patterns, + blanket=blanket_patterns + ) -def detect_protection_scope(text: str, card_name: str, ability: str) -> Optional[str]: +def detect_protection_scope(text: str, card_name: str, ability: str, keywords: Optional[str] = None) -> Optional[str]: """ Detect the scope of a protection effect. Detection priority order (prevents misclassification): + 0. Static keyword → "Self" 1. Opponent ownership → "Opponent Permanents" - 2. Your ownership → "Your Permanents" - 3. Self-reference → "Self" + 2. Self-reference → "Self" + 3. Your ownership → "Your Permanents" 4. No ownership qualifier → "Blanket" Args: text: Card text (lowercase for pattern matching) card_name: Card name (for self-reference detection) ability: Ability type (Ward, Hexproof, etc.) + keywords: Optional keywords field for static keyword detection Returns: Scope prefix or None: "Self", "Your Permanents", "Blanket", "Opponent Permanents" @@ -45,120 +105,22 @@ def detect_protection_scope(text: str, card_name: str, ability: str) -> Optional if not text or not ability: return None - text_lower = text.lower() - ability_lower = ability.lower() - card_name_lower = card_name.lower() + # Build patterns for this ability + patterns = _get_protection_scope_patterns(ability) - # Check if ability is mentioned in text - if ability_lower not in text_lower: - return None - - # Priority 1: Opponent ownership (grants protection TO opponent's permanents) - # Note: Must distinguish from hexproof reminder text "opponents control [spells/abilities]" - # Only match when "opponents control" refers to creatures/permanents, not spells - opponent_patterns = [ - r'creatures?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', - r'permanents?\s+(?:your\s+)?opponents?\s+control\s+(?:have|gain)', - r'each\s+creature\s+an?\s+opponent\s+controls?\s+(?:has|gains?)' - ] - - for pattern in opponent_patterns: - if re.search(pattern, text_lower): - return "Opponent Permanents" - - # Priority 2: Check for self-reference BEFORE "Your Permanents" - # This prevents tilde (~) from being caught by creature type patterns - - # Check for tilde (~) - strong self-reference indicator - tilde_patterns = [ - r'~\s+(?:has|gains?)\s+' + ability_lower, - r'~\s+is\s+' + ability_lower - ] - - for pattern in tilde_patterns: - if re.search(pattern, text_lower): - return "Self" - - # Check for "this creature/permanent" pronouns - this_patterns = [ - r'this\s+(?:creature|permanent|artifact|enchantment)\s+(?:has|gains?)\s+' + ability_lower, - r'^(?:has|gains?)\s+' + ability_lower # Starts with ability (likely self) - ] - - for pattern in this_patterns: - if re.search(pattern, text_lower): - return "Self" - - # Check for card name (replace special characters for matching) - card_name_escaped = re.escape(card_name_lower) - if re.search(rf'\b{card_name_escaped}\b', text_lower): - # Make sure it's in a self-protection context - # e.g., "Svyelun has indestructible" not "Svyelun and other Merfolk" - self_context_patterns = [ - rf'\b{card_name_escaped}\s+(?:has|gains?)\s+{ability_lower}', - rf'\b{card_name_escaped}\s+is\s+{ability_lower}' - ] - for pattern in self_context_patterns: - if re.search(pattern, text_lower): - return "Self" - - # NEW: If no grant patterns found at all, assume inherent protection (Self) - # This catches cards where protection is in the keywords field but not explained in text - # e.g., "Protection from creatures" as a keyword line - # Check if we have the ability keyword but no grant patterns - has_grant_pattern = any(re.search(pattern, text_lower) for pattern in [ - r'(?:have|gain|grant|give|get)[s]?\s+', - r'other\s+', - r'creatures?\s+you\s+control', - r'permanents?\s+you\s+control', - r'equipped', - r'enchanted', - r'target' - ]) - - if not has_grant_pattern: - # No grant verbs found - likely inherent protection - return "Self" - - # Priority 3: Your ownership (most common) - # Note: "Other [Type]" patterns included for type-specific grants - # Note: "equipped creature", "target creature", etc. are permanents you control - your_patterns = [ - r'(?:other\s+)?(?:creatures?|permanents?|artifacts?|enchantments?)\s+you\s+control', - r'your\s+(?:creatures?|permanents?|artifacts?|enchantments?)', - r'each\s+(?:creature|permanent)\s+you\s+control', - r'other\s+\w+s?\s+you\s+control', # "Other Merfolk you control", etc. - # NEW: "Other X you control...have Y" pattern for static grants - r'other\s+(?:\w+\s+)?(?:creatures?|permanents?)\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, - r'other\s+\w+s?\s+you\s+control\s+(?:get\s+[^.]*\s+and\s+)?have\s+' + ability_lower, # "Other Knights you control...have" - r'equipped\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, # Equipment - r'enchanted\s+(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, # Aura - r'target\s+(?:\w+\s+)?(?:creature|permanent)\s+(?:gets\s+[^.]*\s+and\s+)?(?:gains?)\s+' + ability_lower # Target (with optional adjective) - ] - - for pattern in your_patterns: - if re.search(pattern, text_lower): - return "Your Permanents" - - # Priority 4: Blanket (no ownership qualifier) - # Only apply if we have protection keyword but no ownership context - # Note: Abilities can be listed with "and" (e.g., "gain hexproof and indestructible") - blanket_patterns = [ - r'all\s+(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower, - r'each\s+(?:creature|permanent)\s+(?:has|gains?)\s+(?:[^.]*\s+and\s+)?' + ability_lower, - r'(?:creatures?|permanents?)\s+(?:have|gain)\s+(?:[^.]*\s+and\s+)?' + ability_lower - ] - - for pattern in blanket_patterns: - if re.search(pattern, text_lower): - # Double-check no ownership was missed - if 'you control' not in text_lower and 'opponent' not in text_lower: - return "Blanket" - - return None + # Use generic scope detection with grant verb checking AND keywords + return scope_utils.detect_scope( + text=text, + card_name=card_name, + ability_keyword=ability, + patterns=patterns, + allow_multiple=False, + check_grant_verbs=True, + keywords=keywords + ) -def get_protection_scope_tags(text: str, card_name: str) -> Set[str]: +def get_protection_scope_tags(text: str, card_name: str, keywords: Optional[str] = None) -> Set[str]: """ Get all protection scope metadata tags for a card. @@ -167,6 +129,7 @@ def get_protection_scope_tags(text: str, card_name: str) -> Set[str]: Args: text: Card text card_name: Card name + keywords: Optional keywords field for static keyword detection Returns: Set of metadata tags like {"Self: Indestructible", "Your Permanents: Ward"} @@ -178,7 +141,7 @@ def get_protection_scope_tags(text: str, card_name: str) -> Set[str]: # Check each protection ability for ability in PROTECTION_ABILITIES: - scope = detect_protection_scope(text, card_name, ability) + scope = detect_protection_scope(text, card_name, ability, keywords) if scope: # Format: "{Scope}: {Ability}" diff --git a/code/tagging/regex_patterns.py b/code/tagging/regex_patterns.py new file mode 100644 index 0000000..f254e48 --- /dev/null +++ b/code/tagging/regex_patterns.py @@ -0,0 +1,455 @@ +""" +Centralized regex patterns for MTG card tagging. + +All patterns compiled with re.IGNORECASE for case-insensitive matching. +Organized by semantic category for maintainability and reusability. + +Usage: + from code.tagging import regex_patterns as rgx + + mask = df['text'].str.contains(rgx.YOU_CONTROL, na=False) + if rgx.GRANT_HEXPROOF.search(text): + ... + + # Or use builder functions + pattern = rgx.ownership_pattern('creature', 'you') + mask = df['text'].str.contains(pattern, na=False) +""" + +import re +from typing import Pattern, List + +# ============================================================================= +# OWNERSHIP & CONTROLLER PATTERNS +# ============================================================================= + +YOU_CONTROL: Pattern = re.compile(r'you control', re.IGNORECASE) +THEY_CONTROL: Pattern = re.compile(r'they control', re.IGNORECASE) +OPPONENT_CONTROL: Pattern = re.compile(r'opponent[s]? control', re.IGNORECASE) + +CREATURE_YOU_CONTROL: Pattern = re.compile(r'creature[s]? you control', re.IGNORECASE) +PERMANENT_YOU_CONTROL: Pattern = re.compile(r'permanent[s]? you control', re.IGNORECASE) +ARTIFACT_YOU_CONTROL: Pattern = re.compile(r'artifact[s]? you control', re.IGNORECASE) +ENCHANTMENT_YOU_CONTROL: Pattern = re.compile(r'enchantment[s]? you control', re.IGNORECASE) + +# ============================================================================= +# GRANT VERB PATTERNS +# ============================================================================= + +GAIN: Pattern = re.compile(r'\bgain[s]?\b', re.IGNORECASE) +HAS: Pattern = re.compile(r'\bhas\b', re.IGNORECASE) +HAVE: Pattern = re.compile(r'\bhave\b', re.IGNORECASE) +GET: Pattern = re.compile(r'\bget[s]?\b', re.IGNORECASE) + +GRANT_VERBS: List[str] = ['gain', 'gains', 'has', 'have', 'get', 'gets'] + +# ============================================================================= +# TARGETING PATTERNS +# ============================================================================= + +TARGET_PLAYER: Pattern = re.compile(r'target player', re.IGNORECASE) +TARGET_OPPONENT: Pattern = re.compile(r'target opponent', re.IGNORECASE) +TARGET_CREATURE: Pattern = re.compile(r'target creature', re.IGNORECASE) +TARGET_PERMANENT: Pattern = re.compile(r'target permanent', re.IGNORECASE) +TARGET_ARTIFACT: Pattern = re.compile(r'target artifact', re.IGNORECASE) +TARGET_ENCHANTMENT: Pattern = re.compile(r'target enchantment', re.IGNORECASE) + +EACH_PLAYER: Pattern = re.compile(r'each player', re.IGNORECASE) +EACH_OPPONENT: Pattern = re.compile(r'each opponent', re.IGNORECASE) +TARGET_YOU_CONTROL: Pattern = re.compile(r'target .* you control', re.IGNORECASE) + +# ============================================================================= +# PROTECTION ABILITY PATTERNS +# ============================================================================= + +HEXPROOF: Pattern = re.compile(r'\bhexproof\b', re.IGNORECASE) +SHROUD: Pattern = re.compile(r'\bshroud\b', re.IGNORECASE) +INDESTRUCTIBLE: Pattern = re.compile(r'\bindestructible\b', re.IGNORECASE) +WARD: Pattern = re.compile(r'\bward\b', re.IGNORECASE) +PROTECTION_FROM: Pattern = re.compile(r'protection from', re.IGNORECASE) + +PROTECTION_ABILITIES: List[str] = ['hexproof', 'shroud', 'indestructible', 'ward', 'protection'] + +CANT_HAVE_PROTECTION: Pattern = re.compile(r"can't have (hexproof|indestructible|ward|shroud)", re.IGNORECASE) +LOSE_PROTECTION: Pattern = re.compile(r"lose[s]? (hexproof|indestructible|ward|shroud|protection)", re.IGNORECASE) + +# ============================================================================= +# CARD DRAW PATTERNS +# ============================================================================= + +DRAW_A_CARD: Pattern = re.compile(r'draw[s]? (?:a|one) card', re.IGNORECASE) +DRAW_CARDS: Pattern = re.compile(r'draw[s]? (?:two|three|four|five|x|\d+) card', re.IGNORECASE) +DRAW: Pattern = re.compile(r'\bdraw[s]?\b', re.IGNORECASE) + +# ============================================================================= +# TOKEN CREATION PATTERNS +# ============================================================================= + +CREATE_TOKEN: Pattern = re.compile(r'create[s]?.*token', re.IGNORECASE) +PUT_TOKEN: Pattern = re.compile(r'put[s]?.*token', re.IGNORECASE) + +CREATE_TREASURE: Pattern = re.compile(r'create.*treasure token', re.IGNORECASE) +CREATE_FOOD: Pattern = re.compile(r'create.*food token', re.IGNORECASE) +CREATE_CLUE: Pattern = re.compile(r'create.*clue token', re.IGNORECASE) +CREATE_BLOOD: Pattern = re.compile(r'create.*blood token', re.IGNORECASE) + +# ============================================================================= +# COUNTER PATTERNS +# ============================================================================= + +PLUS_ONE_COUNTER: Pattern = re.compile(r'\+1/\+1 counter', re.IGNORECASE) +MINUS_ONE_COUNTER: Pattern = re.compile(r'\-1/\-1 counter', re.IGNORECASE) +LOYALTY_COUNTER: Pattern = re.compile(r'loyalty counter', re.IGNORECASE) +PROLIFERATE: Pattern = re.compile(r'\bproliferate\b', re.IGNORECASE) + +ONE_OR_MORE_COUNTERS: Pattern = re.compile(r'one or more counter', re.IGNORECASE) +ONE_OR_MORE_PLUS_ONE_COUNTERS: Pattern = re.compile(r'one or more \+1/\+1 counter', re.IGNORECASE) +IF_HAD_COUNTERS: Pattern = re.compile(r'if it had counter', re.IGNORECASE) +WITH_COUNTERS_ON_THEM: Pattern = re.compile(r'with counter[s]? on them', re.IGNORECASE) + +# ============================================================================= +# SACRIFICE & REMOVAL PATTERNS +# ============================================================================= + +SACRIFICE: Pattern = re.compile(r'sacrifice[s]?', re.IGNORECASE) +SACRIFICED: Pattern = re.compile(r'sacrificed', re.IGNORECASE) +DESTROY: Pattern = re.compile(r'destroy[s]?', re.IGNORECASE) +EXILE: Pattern = re.compile(r'exile[s]?', re.IGNORECASE) +EXILED: Pattern = re.compile(r'exiled', re.IGNORECASE) + +SACRIFICE_DRAW: Pattern = re.compile(r'sacrifice (?:a|an) (?:artifact|creature|permanent)(?:[^,]*),?[^,]*draw', re.IGNORECASE) +SACRIFICE_COLON_DRAW: Pattern = re.compile(r'sacrifice [^:]+: draw', re.IGNORECASE) +SACRIFICED_COMMA_DRAW: Pattern = re.compile(r'sacrificed[^,]+, draw', re.IGNORECASE) +EXILE_RETURN_BATTLEFIELD: Pattern = re.compile(r'exile.*return.*to the battlefield', re.IGNORECASE) + +# ============================================================================= +# DISCARD PATTERNS +# ============================================================================= + +DISCARD_A_CARD: Pattern = re.compile(r'discard (?:a|one|two|three|x) card', re.IGNORECASE) +DISCARD_YOUR_HAND: Pattern = re.compile(r'discard your hand', re.IGNORECASE) +YOU_DISCARD: Pattern = re.compile(r'you discard', re.IGNORECASE) + +# Discard triggers +WHENEVER_YOU_DISCARD: Pattern = re.compile(r'whenever you discard', re.IGNORECASE) +IF_YOU_DISCARDED: Pattern = re.compile(r'if you discarded', re.IGNORECASE) +WHEN_YOU_DISCARD: Pattern = re.compile(r'when you discard', re.IGNORECASE) +FOR_EACH_DISCARDED: Pattern = re.compile(r'for each card you discarded', re.IGNORECASE) + +# Opponent discard +TARGET_PLAYER_DISCARDS: Pattern = re.compile(r'target player discards', re.IGNORECASE) +TARGET_OPPONENT_DISCARDS: Pattern = re.compile(r'target opponent discards', re.IGNORECASE) +EACH_PLAYER_DISCARDS: Pattern = re.compile(r'each player discards', re.IGNORECASE) +EACH_OPPONENT_DISCARDS: Pattern = re.compile(r'each opponent discards', re.IGNORECASE) +THAT_PLAYER_DISCARDS: Pattern = re.compile(r'that player discards', re.IGNORECASE) + +# Discard cost +ADDITIONAL_COST_DISCARD: Pattern = re.compile(r'as an additional cost to (?:cast this spell|activate this ability),? discard (?:a|one) card', re.IGNORECASE) +ADDITIONAL_COST_DISCARD_SHORT: Pattern = re.compile(r'as an additional cost,? discard (?:a|one) card', re.IGNORECASE) + +MADNESS: Pattern = re.compile(r'\bmadness\b', re.IGNORECASE) + +# ============================================================================= +# DAMAGE & LIFE LOSS PATTERNS +# ============================================================================= + +DEALS_ONE_DAMAGE: Pattern = re.compile(r'deals\s+1\s+damage', re.IGNORECASE) +EXACTLY_ONE_DAMAGE: Pattern = re.compile(r'exactly\s+1\s+damage', re.IGNORECASE) +LOSES_ONE_LIFE: Pattern = re.compile(r'loses\s+1\s+life', re.IGNORECASE) + +# ============================================================================= +# COST REDUCTION PATTERNS +# ============================================================================= + +COST_LESS: Pattern = re.compile(r'cost[s]? \{[\d\w]\} less', re.IGNORECASE) +COST_LESS_TO_CAST: Pattern = re.compile(r'cost[s]? less to cast', re.IGNORECASE) +WITH_X_IN_COST: Pattern = re.compile(r'with \{[xX]\} in (?:its|their)', re.IGNORECASE) +AFFINITY_FOR: Pattern = re.compile(r'affinity for', re.IGNORECASE) +SPELLS_COST: Pattern = re.compile(r'spells cost', re.IGNORECASE) +SPELLS_YOU_CAST_COST: Pattern = re.compile(r'spells you cast cost', re.IGNORECASE) + +# ============================================================================= +# MONARCH & INITIATIVE PATTERNS +# ============================================================================= + +BECOME_MONARCH: Pattern = re.compile(r'becomes? the monarch', re.IGNORECASE) +IS_MONARCH: Pattern = re.compile(r'is the monarch', re.IGNORECASE) +WAS_MONARCH: Pattern = re.compile(r'was the monarch', re.IGNORECASE) +YOU_ARE_MONARCH: Pattern = re.compile(r"you are the monarch|you're the monarch", re.IGNORECASE) +YOU_BECOME_MONARCH: Pattern = re.compile(r'you become the monarch', re.IGNORECASE) +CANT_BECOME_MONARCH: Pattern = re.compile(r"can't become the monarch", re.IGNORECASE) + +# ============================================================================= +# KEYWORD ABILITY PATTERNS +# ============================================================================= + +PARTNER_BASIC: Pattern = re.compile(r'\bpartner\b(?!\s*(?:with|[-—–]))', re.IGNORECASE) +PARTNER_WITH: Pattern = re.compile(r'partner with', re.IGNORECASE) +PARTNER_SURVIVORS: Pattern = re.compile(r'Partner\s*[-—–]\s*Survivors', re.IGNORECASE) +PARTNER_FATHER_SON: Pattern = re.compile(r'Partner\s*[-—–]\s*Father\s*&\s*Son', re.IGNORECASE) + +FLYING: Pattern = re.compile(r'\bflying\b', re.IGNORECASE) +VIGILANCE: Pattern = re.compile(r'\bvigilance\b', re.IGNORECASE) +TRAMPLE: Pattern = re.compile(r'\btrample\b', re.IGNORECASE) +HASTE: Pattern = re.compile(r'\bhaste\b', re.IGNORECASE) +LIFELINK: Pattern = re.compile(r'\blifelink\b', re.IGNORECASE) +DEATHTOUCH: Pattern = re.compile(r'\bdeathtouch\b', re.IGNORECASE) +DOUBLE_STRIKE: Pattern = re.compile(r'double strike', re.IGNORECASE) +FIRST_STRIKE: Pattern = re.compile(r'first strike', re.IGNORECASE) +MENACE: Pattern = re.compile(r'\bmenace\b', re.IGNORECASE) +REACH: Pattern = re.compile(r'\breach\b', re.IGNORECASE) + +UNDYING: Pattern = re.compile(r'\bundying\b', re.IGNORECASE) +PERSIST: Pattern = re.compile(r'\bpersist\b', re.IGNORECASE) +PHASING: Pattern = re.compile(r'\bphasing\b', re.IGNORECASE) +FLASH: Pattern = re.compile(r'\bflash\b', re.IGNORECASE) +TOXIC: Pattern = re.compile(r'toxic\s*\d+', re.IGNORECASE) + +# ============================================================================= +# RETURN TO BATTLEFIELD PATTERNS +# ============================================================================= + +RETURN_TO_BATTLEFIELD: Pattern = re.compile(r'return.*to the battlefield', re.IGNORECASE) +RETURN_IT_TO_BATTLEFIELD: Pattern = re.compile(r'return it to the battlefield', re.IGNORECASE) +RETURN_THAT_CARD_TO_BATTLEFIELD: Pattern = re.compile(r'return that card to the battlefield', re.IGNORECASE) +RETURN_THEM_TO_BATTLEFIELD: Pattern = re.compile(r'return them to the battlefield', re.IGNORECASE) +RETURN_THOSE_CARDS_TO_BATTLEFIELD: Pattern = re.compile(r'return those cards to the battlefield', re.IGNORECASE) + +RETURN_TO_HAND: Pattern = re.compile(r'return.*to.*hand', re.IGNORECASE) +RETURN_YOU_CONTROL_TO_HAND: Pattern = re.compile(r'return target.*you control.*to.*hand', re.IGNORECASE) + +# ============================================================================= +# SCOPE & QUALIFIER PATTERNS +# ============================================================================= + +OTHER_CREATURES: Pattern = re.compile(r'other creature[s]?', re.IGNORECASE) +ALL_CREATURES: Pattern = re.compile(r'\ball creature[s]?\b', re.IGNORECASE) +ALL_PERMANENTS: Pattern = re.compile(r'\ball permanent[s]?\b', re.IGNORECASE) +ALL_SLIVERS: Pattern = re.compile(r'\ball sliver[s]?\b', re.IGNORECASE) + +EQUIPPED_CREATURE: Pattern = re.compile(r'equipped creature', re.IGNORECASE) +ENCHANTED_CREATURE: Pattern = re.compile(r'enchanted creature', re.IGNORECASE) +ENCHANTED_PERMANENT: Pattern = re.compile(r'enchanted permanent', re.IGNORECASE) +ENCHANTED_ENCHANTMENT: Pattern = re.compile(r'enchanted enchantment', re.IGNORECASE) + +# ============================================================================= +# COMBAT PATTERNS +# ============================================================================= + +ATTACK: Pattern = re.compile(r'\battack[s]?\b', re.IGNORECASE) +ATTACKS: Pattern = re.compile(r'\battacks\b', re.IGNORECASE) +BLOCK: Pattern = re.compile(r'\bblock[s]?\b', re.IGNORECASE) +BLOCKS: Pattern = re.compile(r'\bblocks\b', re.IGNORECASE) +COMBAT_DAMAGE: Pattern = re.compile(r'combat damage', re.IGNORECASE) + +WHENEVER_ATTACKS: Pattern = re.compile(r'whenever .* attacks', re.IGNORECASE) +WHEN_ATTACKS: Pattern = re.compile(r'when .* attacks', re.IGNORECASE) + +# ============================================================================= +# TYPE LINE PATTERNS +# ============================================================================= + +INSTANT: Pattern = re.compile(r'\bInstant\b', re.IGNORECASE) +SORCERY: Pattern = re.compile(r'\bSorcery\b', re.IGNORECASE) +ARTIFACT: Pattern = re.compile(r'\bArtifact\b', re.IGNORECASE) +ENCHANTMENT: Pattern = re.compile(r'\bEnchantment\b', re.IGNORECASE) +CREATURE: Pattern = re.compile(r'\bCreature\b', re.IGNORECASE) +PLANESWALKER: Pattern = re.compile(r'\bPlaneswalker\b', re.IGNORECASE) +LAND: Pattern = re.compile(r'\bLand\b', re.IGNORECASE) + +AURA: Pattern = re.compile(r'\bAura\b', re.IGNORECASE) +EQUIPMENT: Pattern = re.compile(r'\bEquipment\b', re.IGNORECASE) +VEHICLE: Pattern = re.compile(r'\bVehicle\b', re.IGNORECASE) +SAGA: Pattern = re.compile(r'\bSaga\b', re.IGNORECASE) + +NONCREATURE: Pattern = re.compile(r'noncreature', re.IGNORECASE) + +# ============================================================================= +# PATTERN BUILDER FUNCTIONS +# ============================================================================= + +def ownership_pattern(subject: str, owner: str = "you") -> Pattern: + """ + Build ownership pattern like 'creatures you control', 'permanents opponent controls'. + + Args: + subject: The card type (e.g., 'creature', 'permanent', 'artifact') + owner: Controller ('you', 'opponent', 'they', etc.) + + Returns: + Compiled regex pattern + + Examples: + >>> ownership_pattern('creature', 'you') + # Matches "creatures you control" + >>> ownership_pattern('artifact', 'opponent') + # Matches "artifacts opponent controls" + """ + pattern = fr'{subject}[s]?\s+{owner}\s+control[s]?' + return re.compile(pattern, re.IGNORECASE) + + +def grant_pattern(subject: str, verb: str, ability: str) -> Pattern: + """ + Build grant pattern like 'creatures you control gain hexproof'. + + Args: + subject: What gains the ability ('creatures you control', 'target creature', etc.) + verb: Grant verb ('gain', 'has', 'get', etc.) + ability: Ability granted ('hexproof', 'flying', 'ward', etc.) + + Returns: + Compiled regex pattern + + Examples: + >>> grant_pattern('creatures you control', 'gain', 'hexproof') + # Matches "creatures you control gain hexproof" + """ + pattern = fr'{subject}\s+{verb}[s]?\s+{ability}' + return re.compile(pattern, re.IGNORECASE) + + +def token_creation_pattern(quantity: str, token_type: str) -> Pattern: + """ + Build token creation pattern like 'create two 1/1 Soldier tokens'. + + Args: + quantity: Number word or variable ('one', 'two', 'x', etc.) + token_type: Token name ('treasure', 'food', 'soldier', etc.) + + Returns: + Compiled regex pattern + + Examples: + >>> token_creation_pattern('two', 'treasure') + # Matches "create two Treasure tokens" + """ + pattern = fr'create[s]?\s+(?:{quantity})\s+.*{token_type}\s+token' + return re.compile(pattern, re.IGNORECASE) + + +def kindred_grant_pattern(tribe: str, ability: str) -> Pattern: + """ + Build kindred grant pattern like 'knights you control gain protection'. + + Args: + tribe: Creature type ('knight', 'elf', 'zombie', etc.) + ability: Ability granted ('hexproof', 'protection', etc.) + + Returns: + Compiled regex pattern + + Examples: + >>> kindred_grant_pattern('knight', 'hexproof') + # Matches "Knights you control gain hexproof" + """ + pattern = fr'{tribe}[s]?\s+you\s+control.*\b{ability}\b' + return re.compile(pattern, re.IGNORECASE) + + +def targeting_pattern(target: str, subject: str = None) -> Pattern: + """ + Build targeting pattern like 'target creature you control'. + + Args: + target: What is targeted ('player', 'opponent', 'creature', etc.) + subject: Optional qualifier ('you control', 'opponent controls', etc.) + + Returns: + Compiled regex pattern + + Examples: + >>> targeting_pattern('creature', 'you control') + # Matches "target creature you control" + >>> targeting_pattern('opponent') + # Matches "target opponent" + """ + if subject: + pattern = fr'target\s+{target}\s+{subject}' + else: + pattern = fr'target\s+{target}' + return re.compile(pattern, re.IGNORECASE) + + +# ============================================================================= +# MODULE EXPORTS +# ============================================================================= + +__all__ = [ + # Ownership + 'YOU_CONTROL', 'THEY_CONTROL', 'OPPONENT_CONTROL', + 'CREATURE_YOU_CONTROL', 'PERMANENT_YOU_CONTROL', 'ARTIFACT_YOU_CONTROL', + 'ENCHANTMENT_YOU_CONTROL', + + # Grant verbs + 'GAIN', 'HAS', 'HAVE', 'GET', 'GRANT_VERBS', + + # Targeting + 'TARGET_PLAYER', 'TARGET_OPPONENT', 'TARGET_CREATURE', 'TARGET_PERMANENT', + 'TARGET_ARTIFACT', 'TARGET_ENCHANTMENT', 'EACH_PLAYER', 'EACH_OPPONENT', + 'TARGET_YOU_CONTROL', + + # Protection abilities + 'HEXPROOF', 'SHROUD', 'INDESTRUCTIBLE', 'WARD', 'PROTECTION_FROM', + 'PROTECTION_ABILITIES', 'CANT_HAVE_PROTECTION', 'LOSE_PROTECTION', + + # Draw + 'DRAW_A_CARD', 'DRAW_CARDS', 'DRAW', + + # Tokens + 'CREATE_TOKEN', 'PUT_TOKEN', + 'CREATE_TREASURE', 'CREATE_FOOD', 'CREATE_CLUE', 'CREATE_BLOOD', + + # Counters + 'PLUS_ONE_COUNTER', 'MINUS_ONE_COUNTER', 'LOYALTY_COUNTER', 'PROLIFERATE', + 'ONE_OR_MORE_COUNTERS', 'ONE_OR_MORE_PLUS_ONE_COUNTERS', 'IF_HAD_COUNTERS', 'WITH_COUNTERS_ON_THEM', + + # Removal + 'SACRIFICE', 'SACRIFICED', 'DESTROY', 'EXILE', 'EXILED', + 'SACRIFICE_DRAW', 'SACRIFICE_COLON_DRAW', 'SACRIFICED_COMMA_DRAW', + 'EXILE_RETURN_BATTLEFIELD', + + # Discard + 'DISCARD_A_CARD', 'DISCARD_YOUR_HAND', 'YOU_DISCARD', + 'WHENEVER_YOU_DISCARD', 'IF_YOU_DISCARDED', 'WHEN_YOU_DISCARD', 'FOR_EACH_DISCARDED', + 'TARGET_PLAYER_DISCARDS', 'TARGET_OPPONENT_DISCARDS', 'EACH_PLAYER_DISCARDS', + 'EACH_OPPONENT_DISCARDS', 'THAT_PLAYER_DISCARDS', + 'ADDITIONAL_COST_DISCARD', 'ADDITIONAL_COST_DISCARD_SHORT', 'MADNESS', + + # Damage & Life Loss + 'DEALS_ONE_DAMAGE', 'EXACTLY_ONE_DAMAGE', 'LOSES_ONE_LIFE', + + # Cost reduction + 'COST_LESS', 'COST_LESS_TO_CAST', 'WITH_X_IN_COST', 'AFFINITY_FOR', 'SPELLS_COST', 'SPELLS_YOU_CAST_COST', + + # Monarch + 'BECOME_MONARCH', 'IS_MONARCH', 'WAS_MONARCH', 'YOU_ARE_MONARCH', + 'YOU_BECOME_MONARCH', 'CANT_BECOME_MONARCH', + + # Keywords + 'PARTNER_BASIC', 'PARTNER_WITH', 'PARTNER_SURVIVORS', 'PARTNER_FATHER_SON', + 'FLYING', 'VIGILANCE', 'TRAMPLE', 'HASTE', 'LIFELINK', 'DEATHTOUCH', + 'DOUBLE_STRIKE', 'FIRST_STRIKE', 'MENACE', 'REACH', + 'UNDYING', 'PERSIST', 'PHASING', 'FLASH', 'TOXIC', + + # Return + 'RETURN_TO_BATTLEFIELD', 'RETURN_IT_TO_BATTLEFIELD', 'RETURN_THAT_CARD_TO_BATTLEFIELD', + 'RETURN_THEM_TO_BATTLEFIELD', 'RETURN_THOSE_CARDS_TO_BATTLEFIELD', + 'RETURN_TO_HAND', 'RETURN_YOU_CONTROL_TO_HAND', + + # Scope + 'OTHER_CREATURES', 'ALL_CREATURES', 'ALL_PERMANENTS', 'ALL_SLIVERS', + 'EQUIPPED_CREATURE', 'ENCHANTED_CREATURE', 'ENCHANTED_PERMANENT', 'ENCHANTED_ENCHANTMENT', + + # Combat + 'ATTACK', 'ATTACKS', 'BLOCK', 'BLOCKS', 'COMBAT_DAMAGE', + 'WHENEVER_ATTACKS', 'WHEN_ATTACKS', + + # Type line + 'INSTANT', 'SORCERY', 'ARTIFACT', 'ENCHANTMENT', 'CREATURE', 'PLANESWALKER', 'LAND', + 'AURA', 'EQUIPMENT', 'VEHICLE', 'SAGA', 'NONCREATURE', + + # Builders + 'ownership_pattern', 'grant_pattern', 'token_creation_pattern', + 'kindred_grant_pattern', 'targeting_pattern', +] diff --git a/code/tagging/scope_detection_utils.py b/code/tagging/scope_detection_utils.py new file mode 100644 index 0000000..9a51c2c --- /dev/null +++ b/code/tagging/scope_detection_utils.py @@ -0,0 +1,420 @@ +""" +Scope Detection Utilities + +Generic utilities for detecting the scope of card abilities (protection, phasing, etc.). +Provides reusable pattern-matching logic to avoid duplication across modules. + +Created as part of M2: Create Scope Detection Utilities milestone. +""" + +# Standard library imports +import re +from dataclasses import dataclass +from typing import List, Optional, Set + +# Local application imports +from . import regex_patterns as rgx +from . import tag_utils +from code.logging_util import get_logger + +logger = get_logger(__name__) + + +@dataclass +class ScopePatterns: + """ + Pattern collections for scope detection. + + Attributes: + opponent: Patterns that indicate opponent ownership + self_ref: Patterns that indicate self-reference + your_permanents: Patterns that indicate "you control" + blanket: Patterns that indicate no ownership qualifier + targeted: Patterns that indicate targeting (optional) + """ + opponent: List[re.Pattern] + self_ref: List[re.Pattern] + your_permanents: List[re.Pattern] + blanket: List[re.Pattern] + targeted: Optional[List[re.Pattern]] = None + + +def detect_scope( + text: str, + card_name: str, + ability_keyword: str, + patterns: ScopePatterns, + allow_multiple: bool = False, + check_grant_verbs: bool = False, + keywords: Optional[str] = None, +) -> Optional[str]: + """ + Generic scope detection with priority ordering. + + Detection priority (prevents misclassification): + 0. Static keyword (in keywords field or simple list) → "Self" + 1. Opponent ownership → "Opponent Permanents" + 2. Self-reference → "Self" + 3. Your ownership → "Your Permanents" + 4. No ownership qualifier → "Blanket" + + Args: + text: Card text + card_name: Card name (for self-reference detection) + ability_keyword: Ability keyword to look for (e.g., "hexproof", "phasing") + patterns: ScopePatterns object with pattern collections + allow_multiple: If True, returns Set[str] instead of single scope + check_grant_verbs: If True, checks for grant verbs before assuming "Self" + keywords: Optional keywords field from card data (for static keyword detection) + + Returns: + Scope string or None: "Self", "Your Permanents", "Blanket", "Opponent Permanents" + If allow_multiple=True, returns Set[str] with all matching scopes + """ + if not text or not ability_keyword: + return set() if allow_multiple else None + + text_lower = text.lower() + ability_lower = ability_keyword.lower() + card_name_lower = card_name.lower() if card_name else '' + + # Check if ability is mentioned in text + if ability_lower not in text_lower: + return set() if allow_multiple else None + + # Priority 0: Check if this is a static keyword ability + # Static keywords appear in the keywords field or as simple comma-separated lists + # without grant verbs (e.g., "Flying, first strike, protection from black") + if check_static_keyword(ability_keyword, keywords, text): + if allow_multiple: + return {"Self"} + else: + return "Self" + + if allow_multiple: + scopes = set() + else: + scopes = None + + # Priority 1: Opponent ownership + for pattern in patterns.opponent: + if pattern.search(text_lower): + if allow_multiple: + scopes.add("Opponent Permanents") + break + else: + return "Opponent Permanents" + + # Priority 2: Self-reference + is_self = _check_self_reference(text_lower, card_name_lower, ability_lower, patterns.self_ref) + + # If check_grant_verbs is True, verify we don't have grant patterns before assuming Self + if is_self and check_grant_verbs: + has_grant_pattern = _has_grant_verbs(text_lower) + if not has_grant_pattern: + if allow_multiple: + scopes.add("Self") + else: + return "Self" + elif is_self: + if allow_multiple: + scopes.add("Self") + else: + return "Self" + + # Priority 3: Your ownership + for pattern in patterns.your_permanents: + if pattern.search(text_lower): + if allow_multiple: + scopes.add("Your Permanents") + break + else: + return "Your Permanents" + + # Priority 4: Blanket (no ownership qualifier) + for pattern in patterns.blanket: + if pattern.search(text_lower): + # Double-check no ownership was missed + if not rgx.YOU_CONTROL.search(text_lower) and 'opponent' not in text_lower: + if allow_multiple: + scopes.add("Blanket") + break + else: + return "Blanket" + + return scopes if allow_multiple else None + + +def detect_multi_scope( + text: str, + card_name: str, + ability_keyword: str, + patterns: ScopePatterns, + check_grant_verbs: bool = False, + keywords: Optional[str] = None, +) -> Set[str]: + """ + Detect multiple scopes for cards with multiple effects. + + Some cards grant abilities to multiple scopes: + - Self-hexproof + grants ward to others + - Target phasing + your permanents phasing + + Args: + text: Card text + card_name: Card name + ability_keyword: Ability keyword to look for + patterns: ScopePatterns object + check_grant_verbs: If True, checks for grant verbs before assuming "Self" + keywords: Optional keywords field for static keyword detection + + Returns: + Set of scope strings + """ + scopes = set() + + if not text or not ability_keyword: + return scopes + + text_lower = text.lower() + ability_lower = ability_keyword.lower() + card_name_lower = card_name.lower() if card_name else '' + + # Check for static keyword first + if check_static_keyword(ability_keyword, keywords, text): + scopes.add("Self") + # For static keywords, we usually don't have multiple scopes + # But continue checking in case there are additional effects + + # Check if ability is mentioned + if ability_lower not in text_lower: + return scopes + + # Check opponent patterns + if any(pattern.search(text_lower) for pattern in patterns.opponent): + scopes.add("Opponent Permanents") + + # Check self-reference + is_self = _check_self_reference(text_lower, card_name_lower, ability_lower, patterns.self_ref) + + if is_self: + if check_grant_verbs: + has_grant_pattern = _has_grant_verbs(text_lower) + if not has_grant_pattern: + scopes.add("Self") + else: + scopes.add("Self") + + # Check your permanents + if any(pattern.search(text_lower) for pattern in patterns.your_permanents): + scopes.add("Your Permanents") + + # Check blanket (no ownership) + has_blanket = any(pattern.search(text_lower) for pattern in patterns.blanket) + no_ownership = not rgx.YOU_CONTROL.search(text_lower) and 'opponent' not in text_lower + + if has_blanket and no_ownership: + scopes.add("Blanket") + + # Optional: Check for targeting + if patterns.targeted: + if any(pattern.search(text_lower) for pattern in patterns.targeted): + scopes.add("Targeted") + + return scopes + + +def _check_self_reference( + text_lower: str, + card_name_lower: str, + ability_lower: str, + self_patterns: List[re.Pattern] +) -> bool: + """ + Check if text contains self-reference patterns. + + Args: + text_lower: Lowercase card text + card_name_lower: Lowercase card name + ability_lower: Lowercase ability keyword + self_patterns: List of self-reference patterns + + Returns: + True if self-reference found + """ + # Check provided self patterns + for pattern in self_patterns: + if pattern.search(text_lower): + return True + + # Check for card name reference (if provided) + if card_name_lower: + card_name_escaped = re.escape(card_name_lower) + card_name_pattern = re.compile(rf'\b{card_name_escaped}\b', re.IGNORECASE) + + if card_name_pattern.search(text_lower): + # Make sure it's in a self-ability context + self_context_patterns = [ + re.compile(rf'\b{card_name_escaped}\s+(?:has|gains?)\s+{ability_lower}', re.IGNORECASE), + re.compile(rf'\b{card_name_escaped}\s+is\s+{ability_lower}', re.IGNORECASE), + ] + + for pattern in self_context_patterns: + if pattern.search(text_lower): + return True + + return False + + +def _has_grant_verbs(text_lower: str) -> bool: + """ + Check if text contains grant verb patterns. + + Used to distinguish inherent abilities from granted abilities. + + Args: + text_lower: Lowercase card text + + Returns: + True if grant verbs found + """ + grant_patterns = [ + re.compile(r'(?:have|gain|grant|give|get)[s]?\s+', re.IGNORECASE), + rgx.OTHER_CREATURES, + rgx.CREATURE_YOU_CONTROL, + rgx.PERMANENT_YOU_CONTROL, + rgx.EQUIPPED_CREATURE, + rgx.ENCHANTED_CREATURE, + rgx.TARGET_CREATURE, + ] + + return any(pattern.search(text_lower) for pattern in grant_patterns) + + +def format_scope_tag(scope: str, ability: str) -> str: + """ + Format a scope and ability into a metadata tag. + + Args: + scope: Scope string (e.g., "Self", "Your Permanents") + ability: Ability name (e.g., "Hexproof", "Phasing") + + Returns: + Formatted tag string (e.g., "Self: Hexproof") + """ + return f"{scope}: {ability}" + + +def has_keyword(text: str, keywords: List[str]) -> bool: + """ + Quick check if card text contains any of the specified keywords. + + Args: + text: Card text + keywords: List of keywords to search for + + Returns: + True if any keyword found + """ + if not text: + return False + + text_lower = text.lower() + return any(keyword.lower() in text_lower for keyword in keywords) + + +def check_static_keyword( + ability_keyword: str, + keywords: Optional[str] = None, + text: Optional[str] = None +) -> bool: + """ + Check if card has ability as a static keyword (not granted to others). + + A static keyword is one that appears: + 1. In the keywords field, OR + 2. As a simple comma-separated list without grant verbs + (e.g., "Flying, first strike, protection from black") + + Args: + ability_keyword: Ability to check (e.g., "Protection", "Hexproof") + keywords: Optional keywords field from card data + text: Optional card text for fallback detection + + Returns: + True if ability appears as static keyword + """ + ability_lower = ability_keyword.lower() + + # Check keywords field first (most reliable) + if keywords: + keywords_lower = keywords.lower() + if ability_lower in keywords_lower: + return True + + # Fallback: Check if ability appears in simple comma-separated keyword list + # Pattern: starts with keywords (Flying, First strike, etc.) without grant verbs + # Example: "Flying, first strike, vigilance, trample, haste, protection from black" + if text: + text_lower = text.lower() + + # Check if ability appears in text but WITHOUT grant verbs + if ability_lower in text_lower: + # Look for grant verbs that would indicate this is NOT a static keyword + grant_verbs = ['have', 'has', 'gain', 'gains', 'get', 'gets', 'grant', 'grants', 'give', 'gives'] + + # Find the position of the ability in text + ability_pos = text_lower.find(ability_lower) + + # Check the 50 characters before the ability for grant verbs + # This catches patterns like "creatures gain protection" or "has hexproof" + context_before = text_lower[max(0, ability_pos - 50):ability_pos] + + # If no grant verbs found nearby, it's likely a static keyword + if not any(verb in context_before for verb in grant_verbs): + # Additional check: is it part of a comma-separated list? + # This helps with "Flying, first strike, protection from X" patterns + context_before_30 = text_lower[max(0, ability_pos - 30):ability_pos] + if ',' in context_before_30 or ability_pos < 10: + return True + + return False + + +def check_static_keyword_legacy( + keywords: str, + static_keyword: str, + text: str, + grant_patterns: Optional[List[re.Pattern]] = None +) -> bool: + """ + LEGACY: Check if card has static keyword without granting it to others. + + Used for abilities like "Phasing" that can be both static and granted. + + Args: + keywords: Card keywords field + static_keyword: Keyword to search for (e.g., "phasing") + text: Card text + grant_patterns: Optional patterns to check for granting language + + Returns: + True if static keyword found and not granted to others + """ + if not keywords: + return False + + keywords_lower = keywords.lower() + + if static_keyword.lower() not in keywords_lower: + return False + + # If grant patterns provided, check if card grants to others + if grant_patterns: + text_no_reminder = tag_utils.strip_reminder_text(text.lower()) if text else '' + grants_to_others = any(pattern.search(text_no_reminder) for pattern in grant_patterns) + + # Only return True if NOT granting to others + return not grants_to_others + + return True diff --git a/code/tagging/tag_constants.py b/code/tagging/tag_constants.py index e3d8895..b197fc5 100644 --- a/code/tagging/tag_constants.py +++ b/code/tagging/tag_constants.py @@ -1,13 +1,59 @@ -from typing import Dict, List, Final +""" +Tag Constants Module + +Centralized constants for card tagging and theme detection across the MTG deckbuilder. +This module contains all shared constants used by the tagging system including: +- Card types and creature types +- Pattern groups and regex fragments +- Tag groupings and relationships +- Protection and ability keywords +- Magic numbers and thresholds +""" + +from typing import Dict, Final, List + +# ============================================================================= +# TABLE OF CONTENTS +# ============================================================================= +# 1. TRIGGERS & BASIC PATTERNS +# 2. TAG GROUPS & RELATIONSHIPS +# 3. PATTERN GROUPS & REGEX FRAGMENTS +# 4. PHRASE GROUPS +# 5. COUNTER TYPES +# 6. CREATURE TYPES +# 7. NON-CREATURE TYPES & SPECIAL TYPES +# 8. PROTECTION & ABILITY KEYWORDS +# 9. TOKEN TYPES +# 10. MAGIC NUMBERS & THRESHOLDS +# 11. DATAFRAME COLUMN REQUIREMENTS +# 12. TYPE-TAG MAPPINGS +# 13. DRAW-RELATED CONSTANTS +# 14. EQUIPMENT-RELATED CONSTANTS +# 15. AURA & VOLTRON CONSTANTS +# 16. LANDS MATTER PATTERNS +# 17. SACRIFICE & GRAVEYARD PATTERNS +# 18. CREATURE-RELATED PATTERNS +# 19. TOKEN-RELATED PATTERNS +# 20. REMOVAL & DESTRUCTION PATTERNS +# 21. SPELL-RELATED PATTERNS +# 22. MISC PATTERNS & EXCLUSIONS + +# ============================================================================= +# 1. TRIGGERS & BASIC PATTERNS +# ============================================================================= TRIGGERS: List[str] = ['when', 'whenever', 'at'] -NUM_TO_SEARCH: List[str] = ['a', 'an', 'one', '1', 'two', '2', 'three', '3', 'four','4', 'five', '5', - 'six', '6', 'seven', '7', 'eight', '8', 'nine', '9', 'ten', '10', - 'x','one or more'] +NUM_TO_SEARCH: List[str] = [ + 'a', 'an', 'one', '1', 'two', '2', 'three', '3', 'four', '4', 'five', '5', + 'six', '6', 'seven', '7', 'eight', '8', 'nine', '9', 'ten', '10', + 'x', 'one or more' +] +# ============================================================================= +# 2. TAG GROUPS & RELATIONSHIPS +# ============================================================================= -# Constants for common tag groupings TAG_GROUPS: Dict[str, List[str]] = { "Cantrips": ["Cantrips", "Card Draw", "Spellslinger", "Spells Matter"], "Tokens": ["Token Creation", "Tokens Matter"], @@ -19,8 +65,11 @@ TAG_GROUPS: Dict[str, List[str]] = { "Spells": ["Spellslinger", "Spells Matter"] } -# Common regex patterns -PATTERN_GROUPS: Dict[str, str] = { +# ============================================================================= +# 3. PATTERN GROUPS & REGEX FRAGMENTS +# ============================================================================= + +PATTERN_GROUPS: Dict[str, str] = { "draw": r"draw[s]? a card|draw[s]? one card", "combat": r"attack[s]?|block[s]?|combat damage", "tokens": r"create[s]? .* token|put[s]? .* token", @@ -30,7 +79,10 @@ PATTERN_GROUPS: Dict[str, str] = { "cost_reduction": r"cost[s]? \{[\d\w]\} less|affinity for|cost[s]? less to cast|chosen type cost|copy cost|from exile cost|from exile this turn cost|from your graveyard cost|has undaunted|have affinity for artifacts|other than your hand cost|spells cost|spells you cast cost|that target .* cost|those spells cost|you cast cost|you pay cost" } -# Common phrase groups (lists) used across taggers +# ============================================================================= +# 4. PHRASE GROUPS +# ============================================================================= + PHRASE_GROUPS: Dict[str, List[str]] = { # Variants for monarch wording "monarch": [ @@ -52,11 +104,15 @@ PHRASE_GROUPS: Dict[str, List[str]] = { r"return .* to the battlefield" ] } -# Common action patterns + CREATE_ACTION_PATTERN: Final[str] = r"create|put" -# Creature/Counter types -COUNTER_TYPES: List[str] = [r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2', +# ============================================================================= +# 5. COUNTER TYPES +# ============================================================================= + +COUNTER_TYPES: List[str] = [ + r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2', '-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2', 'Acorn', 'Aegis', 'Age', 'Aim', 'Arrow', 'Arrowhead','Awakening', 'Bait', 'Blaze', 'Blessing', 'Blight',' Blood', 'Bloddline', @@ -90,9 +146,15 @@ COUNTER_TYPES: List[str] = [r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+ 'Task', 'Ticket', 'Tide', 'Time', 'Tower', 'Training', 'Trap', 'Treasure', 'Unity', 'Unlock', 'Valor', 'Velocity', 'Verse', 'Vitality', 'Void', 'Volatile', 'Vortex', 'Vow', 'Voyage', 'Wage', - 'Winch', 'Wind', 'Wish'] + 'Winch', 'Wind', 'Wish' +] -CREATURE_TYPES: List[str] = ['Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Antelope', 'Ape', 'Archer', 'Archon', 'Armadillo', +# ============================================================================= +# 6. CREATURE TYPES +# ============================================================================= + +CREATURE_TYPES: List[str] = [ + 'Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Antelope', 'Ape', 'Archer', 'Archon', 'Armadillo', 'Army', 'Artificer', 'Assassin', 'Assembly-Worker', 'Astartes', 'Atog', 'Aurochs', 'Automaton', 'Avatar', 'Azra', 'Badger', 'Balloon', 'Barbarian', 'Bard', 'Basilisk', 'Bat', 'Bear', 'Beast', 'Beaver', 'Beeble', 'Beholder', 'Berserker', 'Bird', 'Blinkmoth', 'Boar', 'Brainiac', 'Bringer', 'Brushwagg', @@ -122,9 +184,15 @@ CREATURE_TYPES: List[str] = ['Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Thopter', 'Thrull', 'Tiefling', 'Time Lord', 'Toy', 'Treefolk', 'Trilobite', 'Triskelavite', 'Troll', 'Turtle', 'Tyranid', 'Unicorn', 'Urzan', 'Vampire', 'Varmint', 'Vedalken', 'Volver', 'Wall', 'Walrus', 'Warlock', 'Warrior', 'Wasp', 'Weasel', 'Weird', 'Werewolf', 'Whale', 'Wizard', 'Wolf', 'Wolverine', 'Wombat', - 'Worm', 'Wraith', 'Wurm', 'Yeti', 'Zombie', 'Zubera'] + 'Worm', 'Wraith', 'Wurm', 'Yeti', 'Zombie', 'Zubera' +] -NON_CREATURE_TYPES: List[str] = ['Legendary', 'Creature', 'Enchantment', 'Artifact', +# ============================================================================= +# 7. NON-CREATURE TYPES & SPECIAL TYPES +# ============================================================================= + +NON_CREATURE_TYPES: List[str] = [ + 'Legendary', 'Creature', 'Enchantment', 'Artifact', 'Battle', 'Sorcery', 'Instant', 'Land', '-', '—', 'Blood', 'Clue', 'Food', 'Gold', 'Incubator', 'Junk', 'Map', 'Powerstone', 'Treasure', @@ -136,23 +204,66 @@ NON_CREATURE_TYPES: List[str] = ['Legendary', 'Creature', 'Enchantment', 'Artifa 'Shrine', 'Plains', 'Island', 'Swamp', 'Forest', 'Mountain', 'Cave', 'Desert', 'Gate', 'Lair', 'Locus', 'Mine', - 'Power-Plant', 'Sphere', 'Tower', 'Urza\'s'] + 'Power-Plant', 'Sphere', 'Tower', 'Urza\'s' +] OUTLAW_TYPES: List[str] = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock'] -ENCHANTMENT_TOKENS: List[str] = ['Cursed Role', 'Monster Role', 'Royal Role', 'Sorcerer Role', - 'Virtuous Role', 'Wicked Role', 'Young Hero Role', 'Shard'] -ARTIFACT_TOKENS: List[str] = ['Blood', 'Clue', 'Food', 'Gold', 'Incubator', - 'Junk','Map','Powerstone', 'Treasure'] +# ============================================================================= +# 8. PROTECTION & ABILITY KEYWORDS +# ============================================================================= + +PROTECTION_ABILITIES: List[str] = [ + 'Protection', + 'Ward', + 'Hexproof', + 'Shroud', + 'Indestructible' +] + +PROTECTION_KEYWORDS: Final[frozenset] = frozenset({ + 'hexproof', + 'shroud', + 'indestructible', + 'ward', + 'protection from', + 'protection', +}) + +# ============================================================================= +# 9. TOKEN TYPES +# ============================================================================= + +ENCHANTMENT_TOKENS: List[str] = [ + 'Cursed Role', 'Monster Role', 'Royal Role', 'Sorcerer Role', + 'Virtuous Role', 'Wicked Role', 'Young Hero Role', 'Shard' +] + +ARTIFACT_TOKENS: List[str] = [ + 'Blood', 'Clue', 'Food', 'Gold', 'Incubator', + 'Junk', 'Map', 'Powerstone', 'Treasure' +] + +# ============================================================================= +# 10. MAGIC NUMBERS & THRESHOLDS +# ============================================================================= + +CONTEXT_WINDOW_SIZE: Final[int] = 70 # Characters to examine around a regex match + +# ============================================================================= +# 11. DATAFRAME COLUMN REQUIREMENTS +# ============================================================================= -# Constants for DataFrame validation and processing REQUIRED_COLUMNS: List[str] = [ 'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors', 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side' ] -# Mapping of card types to their corresponding theme tags +# ============================================================================= +# 12. TYPE-TAG MAPPINGS +# ============================================================================= + TYPE_TAG_MAPPING: Dict[str, List[str]] = { 'Artifact': ['Artifacts Matter'], 'Battle': ['Battles Matter'], @@ -166,7 +277,10 @@ TYPE_TAG_MAPPING: Dict[str, List[str]] = { 'Sorcery': ['Spells Matter', 'Spellslinger'] } -# Constants for draw-related functionality +# ============================================================================= +# 13. DRAW-RELATED CONSTANTS +# ============================================================================= + DRAW_RELATED_TAGS: List[str] = [ 'Card Draw', # General card draw effects 'Conditional Draw', # Draw effects with conditions/triggers @@ -175,16 +289,18 @@ DRAW_RELATED_TAGS: List[str] = [ 'Loot', # Draw + discard effects 'Replacement Draw', # Effects that modify or replace draws 'Sacrifice to Draw', # Draw effects requiring sacrificing permanents - 'Unconditional Draw' # Pure card draw without conditions + 'Unconditional Draw' # Pure card draw without conditions ] -# Text patterns that exclude cards from being tagged as unconditional draw DRAW_EXCLUSION_PATTERNS: List[str] = [ 'annihilator', # Eldrazi mechanic that can match 'draw' patterns - 'ravenous', # Keyword that can match 'draw' patterns + 'ravenous', # Keyword that can match 'draw' patterns ] -# Equipment-related constants +# ============================================================================= +# 14. EQUIPMENT-RELATED CONSTANTS +# ============================================================================= + EQUIPMENT_EXCLUSIONS: List[str] = [ 'Bruenor Battlehammer', # Equipment cost reduction 'Nazahn, Revered Bladesmith', # Equipment tutor @@ -223,7 +339,10 @@ EQUIPMENT_TEXT_PATTERNS: List[str] = [ 'unequip', # Equipment removal ] -# Aura-related constants +# ============================================================================= +# 15. AURA & VOLTRON CONSTANTS +# ============================================================================= + AURA_SPECIFIC_CARDS: List[str] = [ 'Ardenn, Intrepid Archaeologist', # Aura movement 'Calix, Guided By Fate', # Create duplicate Auras @@ -267,7 +386,10 @@ VOLTRON_PATTERNS: List[str] = [ 'reconfigure' ] -# Constants for lands matter functionality +# ============================================================================= +# 16. LANDS MATTER PATTERNS +# ============================================================================= + LANDS_MATTER_PATTERNS: Dict[str, List[str]] = { 'land_play': [ 'play a land', diff --git a/code/tagging/tag_utils.py b/code/tagging/tag_utils.py index eb58aa6..1fd771b 100644 --- a/code/tagging/tag_utils.py +++ b/code/tagging/tag_utils.py @@ -13,18 +13,11 @@ The module is designed to work with pandas DataFrames containing card data and p vectorized operations for efficient processing of large card collections. """ from __future__ import annotations - -# Standard library imports import re -from typing import List, Set, Union, Any, Tuple from functools import lru_cache - +from typing import Any, List, Set, Tuple, Union import numpy as np - -# Third-party imports import pandas as pd - -# Local application imports from . import tag_constants @@ -58,7 +51,6 @@ def _ensure_norm_series(df: pd.DataFrame, source_col: str, norm_col: str) -> pd. """ if norm_col in df.columns: return df[norm_col] - # Create normalized string series series = df[source_col].fillna('') if source_col in df.columns else pd.Series([''] * len(df), index=df.index) series = series.astype(str) df[norm_col] = series @@ -120,8 +112,6 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: if len(df) == 0: return pd.Series([], dtype=bool) - - # Use normalized cached series type_series = _ensure_norm_series(df, 'type', '__type_s') if regex: @@ -160,8 +150,6 @@ def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: if len(df) == 0: return pd.Series([], dtype=bool) - - # Use normalized cached series text_series = _ensure_norm_series(df, 'text', '__text_s') if regex: @@ -192,10 +180,7 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege TypeError: If type_text is not a string or list of strings ValueError: If required 'keywords' column is missing from DataFrame """ - # Validate required columns validate_dataframe_columns(df, {'keywords'}) - - # Handle empty DataFrame case if len(df) == 0: return pd.Series([], dtype=bool) @@ -206,8 +191,6 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege type_text = [type_text] elif not isinstance(type_text, list): raise TypeError("type_text must be a string or list of strings") - - # Use normalized cached series for keywords keywords = _ensure_norm_series(df, 'keywords', '__keywords_s') if regex: @@ -245,8 +228,6 @@ def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: if len(df) == 0: return pd.Series([], dtype=bool) - - # Use normalized cached series name_series = _ensure_norm_series(df, 'name', '__name_s') if regex: @@ -324,21 +305,14 @@ def create_tag_mask(df: pd.DataFrame, tag_patterns: Union[str, List[str]], colum Boolean Series indicating matching rows Examples: - # Match cards with draw-related tags >>> mask = create_tag_mask(df, ['Card Draw', 'Conditional Draw']) >>> mask = create_tag_mask(df, 'Unconditional Draw') """ if isinstance(tag_patterns, str): tag_patterns = [tag_patterns] - - # Handle empty DataFrame case if len(df) == 0: return pd.Series([], dtype=bool) - - # Create mask for each pattern masks = [df[column].apply(lambda x: any(pattern in tag for tag in x)) for pattern in tag_patterns] - - # Combine masks with OR return pd.concat(masks, axis=1).any(axis=1) def validate_dataframe_columns(df: pd.DataFrame, required_columns: Set[str]) -> None: @@ -365,11 +339,7 @@ def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series[bool], tags: Union[st """ if not isinstance(tags, list): tags = [tags] - - # Get current tags for masked rows current_tags = df.loc[mask, 'themeTags'] - - # Add new tags df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags)))) def apply_rules(df: pd.DataFrame, rules: List[dict]) -> None: @@ -463,7 +433,6 @@ def create_numbered_phrase_mask( numbers = tag_constants.NUM_TO_SEARCH # Normalize verbs to list verbs = [verb] if isinstance(verb, str) else verb - # Build patterns if noun: patterns = [fr"{v}\s+{num}\s+{noun}" for v in verbs for num in numbers] else: @@ -490,13 +459,8 @@ def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series[bool]: Returns: Boolean Series indicating which cards have mass damage effects """ - # Create patterns for numeric damage number_patterns = [create_damage_pattern(i) for i in range(1, 21)] - - # Add X damage pattern number_patterns.append(create_damage_pattern('X')) - - # Add patterns for damage targets target_patterns = [ 'to each creature', 'to all creatures', @@ -504,8 +468,6 @@ def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series[bool]: 'to each opponent', 'to everything' ] - - # Create masks damage_mask = create_text_mask(df, number_patterns) target_mask = create_text_mask(df, target_patterns) @@ -555,23 +517,14 @@ def normalize_keywords( normalized_keywords: set[str] = set() for keyword in raw: - # Skip non-string entries if not isinstance(keyword, str): continue - - # Skip empty strings keyword = keyword.strip() if not keyword: continue - - # Skip excluded keywords if keyword.lower() in tag_constants.KEYWORD_EXCLUSION_SET: continue - - # Apply normalization map normalized = tag_constants.KEYWORD_NORMALIZATION_MAP.get(keyword, keyword) - - # Check if singleton (unless allowlisted) frequency = frequency_map.get(keyword, 0) is_singleton = frequency == 1 is_allowlisted = normalized in allowlist or keyword in allowlist @@ -658,4 +611,242 @@ def classify_tag(tag: str) -> str: return "metadata" # Default: treat as theme tag - return "theme" \ No newline at end of file + return "theme" + + +# --- Text Processing Helpers (M0.6) --------------------------------------------------------- +def strip_reminder_text(text: str) -> str: + """Remove reminder text (content in parentheses) from card text. + + Reminder text often contains keywords and patterns that can cause false positives + in pattern matching. This function strips all parenthetical content to focus on + the actual game text. + + Args: + text: Card text possibly containing reminder text in parentheses + + Returns: + Text with all parenthetical content removed + + Example: + >>> strip_reminder_text("Hexproof (This creature can't be the target of spells)") + "Hexproof " + """ + if not text: + return text + return re.sub(r'\([^)]*\)', '', text) + + +def extract_context_window(text: str, match_start: int, match_end: int, + window_size: int = None, include_before: bool = False) -> str: + """Extract a context window around a regex match for validation. + + When pattern matching finds a potential match, we often need to examine + the surrounding text to validate the match or check for additional keywords. + This function extracts a window of text around the match position. + + Args: + text: Full text to extract context from + match_start: Start position of the regex match + match_end: End position of the regex match + window_size: Number of characters to include after the match. + If None, uses CONTEXT_WINDOW_SIZE from tag_constants (default: 70). + To include context before the match, use include_before=True. + include_before: If True, includes window_size characters before the match + in addition to after. If False (default), only includes after. + + Returns: + Substring of text containing the match plus surrounding context + + Example: + >>> text = "Creatures you control have hexproof and vigilance" + >>> match = re.search(r'creatures you control', text) + >>> extract_context_window(text, match.start(), match.end(), window_size=30) + 'Creatures you control have hexproof and ' + """ + if not text: + return text + if window_size is None: + from .tag_constants import CONTEXT_WINDOW_SIZE + window_size = CONTEXT_WINDOW_SIZE + + # Calculate window boundaries + if include_before: + context_start = max(0, match_start - window_size) + else: + context_start = match_start + + context_end = min(len(text), match_end + window_size) + + return text[context_start:context_end] + + +# --- Enhanced Tagging Utilities (M3.5/M3.6) ---------------------------------------------------- + +def build_combined_mask( + df: pd.DataFrame, + text_patterns: Union[str, List[str], None] = None, + type_patterns: Union[str, List[str], None] = None, + keyword_patterns: Union[str, List[str], None] = None, + name_list: Union[List[str], None] = None, + exclusion_patterns: Union[str, List[str], None] = None, + combine_with_or: bool = True +) -> pd.Series[bool]: + """Build a combined boolean mask from multiple pattern types. + + This utility reduces boilerplate when creating complex masks by combining + text, type, keyword, and name patterns into a single mask. Patterns are + combined with OR by default, but can be combined with AND. + + Args: + df: DataFrame to search + text_patterns: Patterns to match in 'text' column + type_patterns: Patterns to match in 'type' column + keyword_patterns: Patterns to match in 'keywords' column + name_list: List of exact card names to match + exclusion_patterns: Text patterns to exclude from final mask + combine_with_or: If True, combine masks with OR (default). + If False, combine with AND (requires all conditions) + + Returns: + Boolean Series combining all specified patterns + + Example: + >>> # Match cards with flying OR haste, exclude creatures + >>> mask = build_combined_mask( + ... df, + ... keyword_patterns=['Flying', 'Haste'], + ... exclusion_patterns='Creature' + ... ) + """ + if combine_with_or: + result = pd.Series([False] * len(df), index=df.index) + else: + result = pd.Series([True] * len(df), index=df.index) + masks = [] + + if text_patterns is not None: + masks.append(create_text_mask(df, text_patterns)) + + if type_patterns is not None: + masks.append(create_type_mask(df, type_patterns)) + + if keyword_patterns is not None: + masks.append(create_keyword_mask(df, keyword_patterns)) + + if name_list is not None: + masks.append(create_name_mask(df, name_list)) + if masks: + if combine_with_or: + for mask in masks: + result |= mask + else: + for mask in masks: + result &= mask + if exclusion_patterns is not None: + exclusion_mask = create_text_mask(df, exclusion_patterns) + result &= ~exclusion_mask + + return result + + +def tag_with_logging( + df: pd.DataFrame, + mask: pd.Series[bool], + tags: Union[str, List[str]], + log_message: str, + color: str = '', + logger=None +) -> int: + """Apply tags with standardized logging. + + This utility wraps the common pattern of applying tags and logging the count. + It provides consistent formatting for log messages across the tagging module. + + Args: + df: DataFrame to modify + mask: Boolean mask indicating which rows to tag + tags: Tag(s) to apply + log_message: Description of what's being tagged (e.g., "flying creatures") + color: Color identifier for context (optional) + logger: Logger instance to use (optional, uses print if None) + + Returns: + Count of cards tagged + + Example: + >>> count = tag_with_logging( + ... df, + ... flying_mask, + ... 'Flying', + ... 'creatures with flying ability', + ... color='blue', + ... logger=logger + ... ) + # Logs: "Tagged 42 blue creatures with flying ability" + """ + count = mask.sum() + if count > 0: + apply_tag_vectorized(df, mask, tags) + color_part = f'{color} ' if color else '' + full_message = f'Tagged {count} {color_part}{log_message}' + + if logger: + logger.info(full_message) + else: + print(full_message) + + return count + + +def tag_with_rules_and_logging( + df: pd.DataFrame, + rules: List[dict], + summary_message: str, + color: str = '', + logger=None +) -> int: + """Apply multiple tag rules with summarized logging. + + This utility combines apply_rules with logging, providing a summary of + all cards affected across multiple rules. + + Args: + df: DataFrame to modify + rules: List of rule dicts (each with 'mask' and 'tags') + summary_message: Overall description (e.g., "card draw effects") + color: Color identifier for context (optional) + logger: Logger instance to use (optional) + + Returns: + Total count of unique cards affected by any rule + + Example: + >>> rules = [ + ... {'mask': flying_mask, 'tags': ['Flying']}, + ... {'mask': haste_mask, 'tags': ['Haste', 'Aggro']} + ... ] + >>> count = tag_with_rules_and_logging( + ... df, rules, 'evasive creatures', color='red', logger=logger + ... ) + """ + affected = pd.Series([False] * len(df), index=df.index) + for rule in rules: + mask = rule.get('mask') + if callable(mask): + mask = mask(df) + if mask is not None and mask.any(): + tags = rule.get('tags', []) + apply_tag_vectorized(df, mask, tags) + affected |= mask + + count = affected.sum() + color_part = f'{color} ' if color else '' + full_message = f'Tagged {count} {color_part}{summary_message}' + + if logger: + logger.info(full_message) + else: + print(full_message) + + return count \ No newline at end of file diff --git a/code/tagging/tagger.py b/code/tagging/tagger.py index 94ef6da..ddd5ad5 100644 --- a/code/tagging/tagger.py +++ b/code/tagging/tagger.py @@ -12,16 +12,15 @@ from typing import Any, Dict, List, Union import pandas as pd # Local application imports -from . import tag_utils +from . import regex_patterns as rgx from . import tag_constants +from . import tag_utils from .bracket_policy_applier import apply_bracket_policy_tags from .multi_face_merger import merge_multi_face_rows -from settings import CSV_DIRECTORY, MULTIPLE_COPY_CARDS, COLORS import logging_util from file_setup import setup from file_setup.setup_utils import enrich_commander_rows_with_tags - -# Create logger for this module +from settings import COLORS, CSV_DIRECTORY, MULTIPLE_COPY_CARDS logger = logging_util.logging.getLogger(__name__) logger.setLevel(logging_util.LOG_LEVEL) logger.addHandler(logging_util.file_handler) @@ -160,6 +159,78 @@ def _write_compat_snapshot(df: pd.DataFrame, color: str) -> None: logger.warning("Failed to write unmerged snapshot for %s: %s", color, exc) +def _classify_and_partition_tags( + tags: List[str], + metadata_counts: Dict[str, int], + theme_counts: Dict[str, int] +) -> tuple[List[str], List[str], int, int]: + """Classify tags as metadata or theme and update counters. + + Args: + tags: List of tags to classify + metadata_counts: Dict to track metadata tag counts + theme_counts: Dict to track theme tag counts + + Returns: + Tuple of (metadata_tags, theme_tags, metadata_moved, theme_kept) + """ + metadata_tags = [] + theme_tags = [] + metadata_moved = 0 + theme_kept = 0 + + for tag in tags: + classification = tag_utils.classify_tag(tag) + + if classification == "metadata": + metadata_tags.append(tag) + metadata_counts[tag] = metadata_counts.get(tag, 0) + 1 + metadata_moved += 1 + else: + theme_tags.append(tag) + theme_counts[tag] = theme_counts.get(tag, 0) + 1 + theme_kept += 1 + + return metadata_tags, theme_tags, metadata_moved, theme_kept + + +def _build_partition_diagnostics( + total_rows: int, + rows_with_tags: int, + total_metadata_moved: int, + total_theme_kept: int, + metadata_counts: Dict[str, int], + theme_counts: Dict[str, int] +) -> Dict[str, Any]: + """Build diagnostics dictionary for metadata partition operation. + + Args: + total_rows: Total rows processed + rows_with_tags: Rows that had any tags + total_metadata_moved: Total metadata tags moved + total_theme_kept: Total theme tags kept + metadata_counts: Count of each metadata tag + theme_counts: Count of each theme tag + + Returns: + Diagnostics dictionary + """ + most_common_metadata = sorted(metadata_counts.items(), key=lambda x: x[1], reverse=True)[:10] + most_common_themes = sorted(theme_counts.items(), key=lambda x: x[1], reverse=True)[:10] + + return { + "enabled": True, + "total_rows": total_rows, + "rows_with_tags": rows_with_tags, + "metadata_tags_moved": total_metadata_moved, + "theme_tags_kept": total_theme_kept, + "unique_metadata_tags": len(metadata_counts), + "unique_theme_tags": len(theme_counts), + "most_common_metadata": most_common_metadata, + "most_common_themes": most_common_themes + } + + def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str, Any]]: """Partition tags into themeTags and metadataTags columns. @@ -176,31 +247,9 @@ def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str, Returns: Tuple of (modified DataFrame, diagnostics dict) - - Diagnostics dict contains: - - total_rows: number of rows processed - - rows_with_tags: rows that had any tags - - metadata_tags_moved: total count of metadata tags moved - - theme_tags_kept: total count of theme tags kept - - tag_distribution: dict mapping tag -> classification - - most_common_metadata: list of (tag, count) tuples - - most_common_themes: list of (tag, count) tuples - - Example: - >>> df = pd.DataFrame({'themeTags': [['Card Draw', 'Applied: Cost Reduction']]}) - >>> df_out, diag = _apply_metadata_partition(df) - >>> df_out['themeTags'].iloc[0] - ['Card Draw'] - >>> df_out['metadataTags'].iloc[0] - ['Applied: Cost Reduction'] - >>> diag['metadata_tags_moved'] - 1 """ - # Check feature flag directly from environment (not from settings module) - # This allows tests to monkeypatch the environment variable tag_metadata_split = os.getenv('TAG_METADATA_SPLIT', '1').lower() not in ('0', 'false', 'off', 'disabled') - # Feature flag check - return unmodified if disabled if not tag_metadata_split: logger.info("TAG_METADATA_SPLIT disabled, skipping metadata partition") return df, { @@ -209,7 +258,6 @@ def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str, "message": "Feature disabled via TAG_METADATA_SPLIT=0" } - # Validate input if 'themeTags' not in df.columns: logger.warning("No 'themeTags' column found, skipping metadata partition") return df, { @@ -217,72 +265,40 @@ def _apply_metadata_partition(df: pd.DataFrame) -> tuple[pd.DataFrame, Dict[str, "error": "Missing themeTags column", "total_rows": len(df) } - - # Initialize metadataTags column df['metadataTags'] = pd.Series([[] for _ in range(len(df))], index=df.index) - - # Track statistics metadata_counts: Dict[str, int] = {} theme_counts: Dict[str, int] = {} total_metadata_moved = 0 total_theme_kept = 0 rows_with_tags = 0 - - # Process each row for idx in df.index: tags = df.at[idx, 'themeTags'] - # Skip if not a list or empty if not isinstance(tags, list) or not tags: continue rows_with_tags += 1 - # Classify each tag - metadata_tags = [] - theme_tags = [] + # Classify and partition tags + metadata_tags, theme_tags, meta_moved, theme_kept = _classify_and_partition_tags( + tags, metadata_counts, theme_counts + ) - for tag in tags: - classification = tag_utils.classify_tag(tag) - - if classification == "metadata": - metadata_tags.append(tag) - metadata_counts[tag] = metadata_counts.get(tag, 0) + 1 - total_metadata_moved += 1 - else: - theme_tags.append(tag) - theme_counts[tag] = theme_counts.get(tag, 0) + 1 - total_theme_kept += 1 - - # Update columns + total_metadata_moved += meta_moved + total_theme_kept += theme_kept df.at[idx, 'themeTags'] = theme_tags df.at[idx, 'metadataTags'] = metadata_tags - - # Sort tag lists for top N reporting - most_common_metadata = sorted(metadata_counts.items(), key=lambda x: x[1], reverse=True)[:10] - most_common_themes = sorted(theme_counts.items(), key=lambda x: x[1], reverse=True)[:10] - - # Build diagnostics - diagnostics = { - "enabled": True, - "total_rows": len(df), - "rows_with_tags": rows_with_tags, - "metadata_tags_moved": total_metadata_moved, - "theme_tags_kept": total_theme_kept, - "unique_metadata_tags": len(metadata_counts), - "unique_theme_tags": len(theme_counts), - "most_common_metadata": most_common_metadata, - "most_common_themes": most_common_themes - } - - # Log summary + diagnostics = _build_partition_diagnostics( + len(df), rows_with_tags, total_metadata_moved, total_theme_kept, + metadata_counts, theme_counts + ) logger.info( f"Metadata partition complete: {total_metadata_moved} metadata tags moved, " f"{total_theme_kept} theme tags kept across {rows_with_tags} rows" ) - if most_common_metadata: - top_5_metadata = ', '.join([f"{tag}({ct})" for tag, ct in most_common_metadata[:5]]) + if diagnostics["most_common_metadata"]: + top_5_metadata = ', '.join([f"{tag}({ct})" for tag, ct in diagnostics["most_common_metadata"][:5]]) logger.info(f"Top metadata tags: {top_5_metadata}") return df, diagnostics @@ -312,12 +328,8 @@ def load_dataframe(color: str) -> None: # Load initial dataframe for validation check_df = pd.read_csv(filepath) - - # Validate required columns required_columns = ['creatureTypes', 'themeTags'] missing_columns = [col for col in required_columns if col not in check_df.columns] - - # Handle missing columns if missing_columns: logger.warning(f"Missing columns: {missing_columns}") if 'creatureTypes' not in check_df.columns: @@ -341,14 +353,10 @@ def load_dataframe(color: str) -> None: # Load final dataframe with proper converters # M3: metadataTags is optional (may not exist in older CSVs) converters = {'themeTags': pd.eval, 'creatureTypes': pd.eval} - - # Add metadataTags converter if column exists if 'metadataTags' in check_df.columns: converters['metadataTags'] = pd.eval df = pd.read_csv(filepath, converters=converters) - - # Process the dataframe tag_by_color(df, color) except FileNotFoundError as e: @@ -361,76 +369,77 @@ def load_dataframe(color: str) -> None: logger.error(f'An unexpected error occurred: {e}') raise -## Tag cards on a color-by-color basis -def tag_by_color(df: pd.DataFrame, color: str) -> None: +def _tag_foundational_categories(df: pd.DataFrame, color: str) -> None: + """Apply foundational card categorization (creature types, card types, keywords). - #load_dataframe() - #answer = input('Would you like to regenerate the CSV file?\n') - #if answer.lower() in ['yes', 'y']: - # regenerate_csv_by_color(color) - # kindred_tagging(df, color) - # create_theme_tags(df, color) - #else: - # pass + Args: + df: DataFrame containing card data + color: Color identifier for logging + """ kindred_tagging(df, color) print('\n====================\n') create_theme_tags(df, color) print('\n====================\n') - - # Go through each type of tagging add_creatures_to_tags(df, color) print('\n====================\n') tag_for_card_types(df, color) print('\n====================\n') tag_for_keywords(df, color) print('\n====================\n') - - ## Tag for partner effects tag_for_partner_effects(df, color) print('\n====================\n') + + +def _tag_mechanical_themes(df: pd.DataFrame, color: str) -> None: + """Apply mechanical theme tags (cost reduction, draw, artifacts, enchantments, etc.). - ## Tag for various effects + Args: + df: DataFrame containing card data + color: Color identifier for logging + """ tag_for_cost_reduction(df, color) print('\n====================\n') - # Freerunning is a keyworded cost-reduction mechanic tag_for_freerunning(df, color) print('\n====================\n') tag_for_card_draw(df, color) print('\n====================\n') - # Discard-centric effects and triggers tag_for_discard_matters(df, color) print('\n====================\n') - # Explore and Map tokens provide selection and incidental counters tag_for_explore_and_map(df, color) print('\n====================\n') tag_for_artifacts(df, color) print('\n====================\n') tag_for_enchantments(df, color) print('\n====================\n') - # Craft is a transform mechanic that often references artifacts, exile, and graveyards tag_for_craft(df, color) print('\n====================\n') tag_for_exile_matters(df, color) print('\n====================\n') - # Custom keywords/mechanics tag_for_bending(df, color) print('\n====================\n') tag_for_tokens(df, color) print('\n====================\n') - # Rad counters are tracked separately to surface the theme tag_for_rad_counters(df, color) print('\n====================\n') tag_for_life_matters(df, color) print('\n====================\n') tag_for_counters(df, color) print('\n====================\n') + + +def _tag_strategic_themes(df: pd.DataFrame, color: str) -> None: + """Apply strategic theme tags (voltron, lands, spellslinger, ramp). + + Args: + df: DataFrame containing card data + color: Color identifier for logging + """ tag_for_voltron(df, color) print('\n====================\n') tag_for_lands_matter(df, color) print('\n====================\n') tag_for_spellslinger(df, color) print('\n====================\n') - # Spree spells are modal and cost-scale via additional payments tag_for_spree(df, color) print('\n====================\n') tag_for_ramp(df, color) @@ -439,16 +448,44 @@ def tag_by_color(df: pd.DataFrame, color: str) -> None: print('\n====================\n') tag_for_interaction(df, color) print('\n====================\n') - # Broad archetype taggers (high-level deck identities) + + +def _tag_archetype_themes(df: pd.DataFrame, color: str) -> None: + """Apply high-level archetype tags (midrange, toolbox, pillowfort, politics). + + Args: + df: DataFrame containing card data + color: Color identifier for logging + """ tag_for_midrange_archetype(df, color) print('\n====================\n') tag_for_toolbox_archetype(df, color) print('\n====================\n') - # Pillowfort and Politics rely on previously applied control / stax style tags tag_for_pillowfort(df, color) print('\n====================\n') tag_for_politics(df, color) print('\n====================\n') + + +## Tag cards on a color-by-color basis +def tag_by_color(df: pd.DataFrame, color: str) -> None: + """Orchestrate all tagging operations for a color's DataFrame. + + Applies tags in this order: + 1. Foundational categories (creature types, card types, keywords) + 2. Mechanical themes (cost reduction, draw, artifacts, tokens, etc.) + 3. Strategic themes (voltron, lands matter, spellslinger, ramp) + 4. High-level archetypes (midrange, toolbox, pillowfort, politics) + 5. Bracket policy tags + + Args: + df: DataFrame containing card data + color: Color identifier for logging + """ + _tag_foundational_categories(df, color) + _tag_mechanical_themes(df, color) + _tag_strategic_themes(df, color) + _tag_archetype_themes(df, color) # Apply bracket policy tags (from config/card_lists/*.json) apply_bracket_policy_tags(df) @@ -493,7 +530,6 @@ def kindred_tagging(df: pd.DataFrame, color: str) -> None: logger.info(f'Setting creature type tags on {color}_cards.csv') try: - # Initialize creatureTypes column vectorized df['creatureTypes'] = pd.Series([[] for _ in range(len(df))], index=df.index) # Detect creature types using vectorized split/filter @@ -514,7 +550,6 @@ def kindred_tagging(df: pd.DataFrame, color: str) -> None: print('\n==========\n') logger.info(f'Setting Outlaw creature type tags on {color}_cards.csv') - # Process outlaw types outlaws = tag_constants.OUTLAW_TYPES df['creatureTypes'] = df.apply( lambda row: tag_utils.add_outlaw_type(row['creatureTypes'], outlaws) @@ -576,10 +611,7 @@ def create_theme_tags(df: pd.DataFrame, color: str) -> None: ValueError: If required columns are missing or color is invalid TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() logger.info('Initializing theme tags for %s cards', color) - - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): @@ -588,7 +620,6 @@ def create_theme_tags(df: pd.DataFrame, color: str) -> None: raise ValueError(f"Invalid color: {color}") try: - # Initialize themeTags column using vectorized operation df['themeTags'] = pd.Series([[] for _ in range(len(df))], index=df.index) # Define expected columns @@ -596,8 +627,6 @@ def create_theme_tags(df: pd.DataFrame, color: str) -> None: 'name', 'text', 'type', 'keywords', 'creatureTypes', 'power', 'toughness' } - - # Validate required columns missing = required_columns - set(df.columns) if missing: raise ValueError(f"Missing required columns: {missing}") @@ -610,8 +639,7 @@ def create_theme_tags(df: pd.DataFrame, color: str) -> None: df = df.reindex(columns=available_cols) # Skip intermediate disk writes; final save happens at end of tag_by_color - total_time = pd.Timestamp.now() - start_time - logger.info(f'Theme tags initialized in {total_time.total_seconds():.2f}s') + logger.info('Theme tags initialized for %s', color) except Exception as e: logger.error('Error initializing theme tags: %s', str(e)) @@ -631,34 +659,24 @@ def tag_for_card_types(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required columns are missing """ - start_time = pd.Timestamp.now() - logger.info('Setting card type tags on %s_cards.csv', color) - try: - # Validate required columns required_cols = {'type', 'themeTags'} if not required_cols.issubset(df.columns): raise ValueError(f"Missing required columns: {required_cols - set(df.columns)}") # Define type-to-tag mapping type_tag_map = tag_constants.TYPE_TAG_MAPPING - - # Process each card type - for card_type, tags in type_tag_map.items(): - mask = tag_utils.create_type_mask(df, card_type) - if mask.any(): - tag_utils.apply_tag_vectorized(df, mask, tags) - logger.info('Tagged %d cards with %s type', mask.sum(), card_type) - - # Log completion - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Card type tagging completed in %.2fs', duration) + rules = [ + { 'mask': tag_utils.create_type_mask(df, card_type), 'tags': tags } + for card_type, tags in type_tag_map.items() + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'card type tags', color=color, logger=logger + ) except Exception as e: logger.error('Error in tag_for_card_types: %s', str(e)) raise - # Overwrite file with artifact tag added - logger.info(f'Card type tags set on {color}_cards.csv.') ## Add creature types to the theme tags def add_creatures_to_tags(df: pd.DataFrame, color: str) -> None: @@ -675,27 +693,20 @@ def add_creatures_to_tags(df: pd.DataFrame, color: str) -> None: ValueError: If required columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() logger.info(f'Adding creature types to theme tags in {color}_cards.csv') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'creatureTypes', 'themeTags'} missing = required_cols - set(df.columns) if missing: raise ValueError(f"Missing required columns: {missing}") - - # Create mask for rows with non-empty creature types has_creatures_mask = df['creatureTypes'].apply(lambda x: bool(x) if isinstance(x, list) else False) if has_creatures_mask.any(): - # Get rows with creature types creature_rows = df[has_creatures_mask] # Generate kindred tags vectorized @@ -703,12 +714,9 @@ def add_creatures_to_tags(df: pd.DataFrame, color: str) -> None: current_tags = row['themeTags'] kindred_tags = [f"{ct} Kindred" for ct in row['creatureTypes']] return sorted(list(set(current_tags + kindred_tags))) - - # Update tags for matching rows df.loc[has_creatures_mask, 'themeTags'] = creature_rows.apply(add_kindred_tags, axis=1) - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Added kindred tags to {has_creatures_mask.sum()} cards in {duration:.2f}s') + logger.info(f'Added kindred tags to {has_creatures_mask.sum()} cards') else: logger.info('No cards with creature types found') @@ -749,8 +757,6 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None: else: logger.warning('Keyword frequency map not found, normalization disabled for this run') TAG_NORMALIZE_KEYWORDS = False - - # Create mask for valid keywords has_keywords = pd.notna(df['keywords']) if has_keywords.any(): @@ -817,47 +823,19 @@ def tag_for_partner_effects(df: pd.DataFrame, color: str) -> None: Looks for 'partner', 'partner with', and permutations in rules text and applies tags accordingly. """ - logger.info(f'Tagging Partner keywords in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - rules = [] - partner_mask = tag_utils.create_text_mask(df, r"\bpartner\b(?!\s*(?:with|[-—–]))") - if partner_mask.any(): - rules.append({ 'mask': partner_mask, 'tags': ['Partner'] }) - - partner_with_mask = tag_utils.create_text_mask(df, 'partner with') - if partner_with_mask.any(): - rules.append({ 'mask': partner_with_mask, 'tags': ['Partner with'] }) - - partner_survivors_mask = tag_utils.create_text_mask(df, r"Partner\s*[-—–]\s*Survivors") - if partner_survivors_mask.any(): - rules.append({ 'mask': partner_survivors_mask, 'tags': ['Partner - Survivors'] }) - - partner_father_and_son = tag_utils.create_text_mask(df, r"Partner\s*[-—–]\s*Father\s*&\s*Son") - if partner_father_and_son.any(): - rules.append({ 'mask': partner_father_and_son, 'tags': ['Partner - Father & Son'] }) - - friends_forever_mask = tag_utils.create_text_mask(df, 'Friends forever') - if friends_forever_mask.any(): - rules.append({ 'mask': friends_forever_mask, 'tags': ['Friends Forever'] }) - - doctors_companion_mask = tag_utils.create_text_mask(df, "Doctor's companion") - if doctors_companion_mask.any(): - rules.append({ 'mask': doctors_companion_mask, 'tags': ["Doctor's Companion"] }) - - if rules: - tag_utils.apply_rules(df, rules) - total = sum(int(r['mask'].sum()) for r in rules) - logger.info('Tagged %d cards with Partner keywords', total) - else: - logger.info('No Partner keywords found') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Bending tagging in %.2fs', duration) + rules = [ + {'mask': tag_utils.create_text_mask(df, r"\bpartner\b(?!\s*(?:with|[-—–]))"), 'tags': ['Partner']}, + {'mask': tag_utils.create_text_mask(df, 'partner with'), 'tags': ['Partner with']}, + {'mask': tag_utils.create_text_mask(df, r"Partner\s*[-—–]\s*Survivors"), 'tags': ['Partner - Survivors']}, + {'mask': tag_utils.create_text_mask(df, r"Partner\s*[-—–]\s*Father\s*&\s*Son"), 'tags': ['Partner - Father & Son']}, + {'mask': tag_utils.create_text_mask(df, 'Friends forever'), 'tags': ['Friends Forever']}, + {'mask': tag_utils.create_text_mask(df, "Doctor's companion"), 'tags': ["Doctor's Companion"]}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'partner effects', color=color, logger=logger) except Exception as e: - logger.error(f'Error tagging Bending keywords: {str(e)}') + logger.error(f'Error tagging partner keywords: {str(e)}') raise ### Cost reductions @@ -874,11 +852,7 @@ def tag_for_cost_reduction(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info('Tagging cost reduction cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create masks for different cost reduction patterns cost_mask = tag_utils.create_text_mask(df, tag_constants.PATTERN_GROUPS['cost_reduction']) # Add specific named cards @@ -895,19 +869,12 @@ def tag_for_cost_reduction(df: pd.DataFrame, color: str) -> None: 'Will Kenrith' ] named_mask = tag_utils.create_name_mask(df, named_cards) - - # Combine masks final_mask = cost_mask | named_mask - - # Apply tags via rules engine spell_mask = final_mask & tag_utils.create_text_mask(df, r"Sorcery|Instant|noncreature") - tag_utils.apply_rules(df, [ + tag_utils.tag_with_rules_and_logging(df, [ { 'mask': final_mask, 'tags': ['Cost Reduction'] }, { 'mask': spell_mask, 'tags': ['Spellslinger', 'Spells Matter'] }, - ]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Tagged %d cost reduction cards in %.2fs', final_mask.sum(), duration) + ], 'cost reduction cards', color=color, logger=logger) except Exception as e: logger.error('Error tagging cost reduction cards: %s', str(e)) @@ -941,13 +908,10 @@ def tag_for_card_draw(df: pd.DataFrame, color: str) -> None: logger.info(f'Starting card draw effect tagging for {color}_cards.csv') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -975,8 +939,6 @@ def tag_for_card_draw(df: pd.DataFrame, color: str) -> None: tag_for_unconditional_draw(df, color) logger.info('Completed unconditional draw tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all card draw tagging in {duration.total_seconds():.2f}s') @@ -994,14 +956,9 @@ def create_unconditional_draw_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards have unconditional draw effects """ - # Create pattern for draw effects using num_to_search draw_mask = tag_utils.create_numbered_phrase_mask(df, 'draw', 'card') - - # Create exclusion mask for conditional effects excluded_tags = tag_constants.DRAW_RELATED_TAGS tag_mask = tag_utils.create_tag_mask(df, excluded_tags) - - # Create text-based exclusions text_patterns = tag_constants.DRAW_EXCLUSION_PATTERNS text_mask = tag_utils.create_text_mask(df, text_patterns) @@ -1018,19 +975,9 @@ def tag_for_unconditional_draw(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging unconditional draw effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create mask for unconditional draw effects draw_mask = create_unconditional_draw_mask(df) - - # Apply tags - tag_utils.apply_tag_vectorized(df, draw_mask, ['Unconditional Draw', 'Card Draw']) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {draw_mask.sum()} cards with unconditional draw effects in {duration:.2f}s') + tag_utils.tag_with_logging(df, draw_mask, ['Unconditional Draw', 'Card Draw'], 'unconditional draw effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging unconditional draw effects: {str(e)}') @@ -1046,15 +993,10 @@ def create_conditional_draw_exclusion_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards should be excluded """ - # Create tag-based exclusions excluded_tags = tag_constants.DRAW_RELATED_TAGS tag_mask = tag_utils.create_tag_mask(df, excluded_tags) - - # Create text-based exclusions text_patterns = tag_constants.DRAW_EXCLUSION_PATTERNS + ['whenever you draw a card'] text_mask = tag_utils.create_text_mask(df, text_patterns) - - # Create name-based exclusions excluded_names = ['relic vial', 'vexing bauble'] name_mask = tag_utils.create_name_mask(df, excluded_names) @@ -1121,33 +1063,18 @@ def tag_for_conditional_draw(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging conditional draw effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create exclusion mask + # Build masks exclusion_mask = create_conditional_draw_exclusion_mask(df) - - # Create trigger mask trigger_mask = create_conditional_draw_trigger_mask(df) - - # Create draw effect mask + + # Create draw effect mask with extra patterns draw_mask = tag_utils.create_numbered_phrase_mask(df, 'draw', 'card') - # Add token and 'draw for each' patterns - extra_patterns = [ - 'created a token.*draw', - 'draw a card for each' - ] - draw_mask = draw_mask | tag_utils.create_text_mask(df, extra_patterns) + draw_mask = draw_mask | tag_utils.create_text_mask(df, ['created a token.*draw', 'draw a card for each']) - # Combine masks + # Combine: trigger & draw & ~exclusion final_mask = trigger_mask & draw_mask & ~exclusion_mask - - # Apply tags - tag_utils.apply_tag_vectorized(df, final_mask, ['Conditional Draw', 'Card Draw']) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with conditional draw effects in {duration:.2f}s') + tag_utils.tag_with_logging(df, final_mask, ['Conditional Draw', 'Card Draw'], 'conditional draw effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging conditional draw effects: {str(e)}') @@ -1229,32 +1156,17 @@ def tag_for_loot_effects(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging loot-like effects in {color}_cards.csv') - - # Create masks for each effect type loot_mask = create_loot_mask(df) connive_mask = create_connive_mask(df) cycling_mask = create_cycling_mask(df) blood_mask = create_blood_mask(df) - - # Apply tags based on masks - if loot_mask.any(): - tag_utils.apply_tag_vectorized(df, loot_mask, ['Loot', 'Card Draw', 'Discard Matters']) - logger.info(f'Tagged {loot_mask.sum()} cards with standard loot effects') - - if connive_mask.any(): - tag_utils.apply_tag_vectorized(df, connive_mask, ['Connive', 'Loot', 'Card Draw', 'Discard Matters']) - logger.info(f'Tagged {connive_mask.sum()} cards with connive effects') - - if cycling_mask.any(): - tag_utils.apply_tag_vectorized(df, cycling_mask, ['Cycling', 'Loot', 'Card Draw', 'Discard Matters']) - logger.info(f'Tagged {cycling_mask.sum()} cards with cycling effects') - - if blood_mask.any(): - tag_utils.apply_tag_vectorized(df, blood_mask, ['Blood Token', 'Loot', 'Card Draw', 'Discard Matters']) - logger.info(f'Tagged {blood_mask.sum()} cards with blood token effects') - - logger.info('Completed tagging loot-like effects') + rules = [ + {'mask': loot_mask, 'tags': ['Loot', 'Card Draw', 'Discard Matters']}, + {'mask': connive_mask, 'tags': ['Connive', 'Loot', 'Card Draw', 'Discard Matters']}, + {'mask': cycling_mask, 'tags': ['Cycling', 'Loot', 'Card Draw', 'Discard Matters']}, + {'mask': blood_mask, 'tags': ['Blood Token', 'Loot', 'Card Draw', 'Discard Matters']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'loot-like effects', color=color, logger=logger) ## Sacrifice or pay life to draw effects def tag_for_cost_draw(df: pd.DataFrame, color: str) -> None: @@ -1264,30 +1176,19 @@ def tag_for_cost_draw(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info('Tagging cost-based draw effects in %s_cards.csv', color) - - # Split into life and sacrifice patterns - life_pattern = 'life: draw' - life_mask = df['text'].str.contains(life_pattern, case=False, na=False) - - sac_patterns = [ - r'sacrifice (?:a|an) (?:artifact|creature|permanent)(?:[^,]*),?[^,]*draw', - r'sacrifice [^:]+: draw', - r'sacrificed[^,]+, draw' + life_mask = df['text'].str.contains('life: draw', case=False, na=False) + + # Use compiled patterns from regex_patterns module + sac_mask = ( + df['text'].str.contains(rgx.SACRIFICE_DRAW.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.SACRIFICE_COLON_DRAW.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.SACRIFICED_COMMA_DRAW.pattern, case=False, na=False, regex=True) + ) + rules = [ + {'mask': life_mask, 'tags': ['Life to Draw', 'Card Draw']}, + {'mask': sac_mask, 'tags': ['Sacrifice to Draw', 'Card Draw']}, ] - sac_mask = df['text'].str.contains('|'.join(sac_patterns), case=False, na=False, regex=True) - - # Apply life draw tags - if life_mask.any(): - tag_utils.apply_tag_vectorized(df, life_mask, ['Life to Draw', 'Card Draw']) - logger.info('Tagged %d cards with life payment draw effects', life_mask.sum()) - - # Apply sacrifice draw tags - if sac_mask.any(): - tag_utils.apply_tag_vectorized(df, sac_mask, ['Sacrifice to Draw', 'Card Draw']) - logger.info('Tagged %d cards with sacrifice draw effects', sac_mask.sum()) - - logger.info('Completed tagging cost-based draw effects') + tag_utils.tag_with_rules_and_logging(df, rules, 'cost-based draw effects', color=color, logger=logger) ## Replacement effects, that might have you draw more cards def create_replacement_draw_mask(df: pd.DataFrame) -> pd.Series: @@ -1315,11 +1216,7 @@ def create_replacement_draw_mask(df: pd.DataFrame) -> pd.Series: 'if an opponent would.*instead.*draw', 'if you would.*instead.*draw' ] - - # Combine all patterns all_patterns = '|'.join(trigger_patterns + replacement_patterns) - - # Create base mask for replacement effects base_mask = tag_utils.create_text_mask(df, all_patterns) # Add mask for specific card numbers @@ -1339,11 +1236,8 @@ def create_replacement_draw_exclusion_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards should be excluded """ - # Create tag-based exclusions excluded_tags = tag_constants.DRAW_RELATED_TAGS tag_mask = tag_utils.create_tag_mask(df, excluded_tags) - - # Create text-based exclusions text_patterns = tag_constants.DRAW_EXCLUSION_PATTERNS + ['skips that turn instead'] text_mask = tag_utils.create_text_mask(df, text_patterns) @@ -1365,32 +1259,20 @@ def tag_for_replacement_draw(df: pd.DataFrame, color: str) -> None: - Specific card number replacements - Non-specific card number replacements ("draw that many plus") """ - logger.info(f'Tagging replacement draw effects in {color}_cards.csv') - try: - # Create replacement draw mask + # Build masks replacement_mask = create_replacement_draw_mask(df) - - # Create exclusion mask exclusion_mask = create_replacement_draw_exclusion_mask(df) - - # Add specific card names specific_cards_mask = tag_utils.create_name_mask(df, 'sylvan library') - # Combine masks + # Combine: (replacement & ~exclusion) OR specific cards final_mask = (replacement_mask & ~exclusion_mask) | specific_cards_mask - - # Apply tags - tag_utils.apply_tag_vectorized(df, final_mask, ['Replacement Draw', 'Card Draw']) - - logger.info(f'Tagged {final_mask.sum()} cards with replacement draw effects') + tag_utils.tag_with_logging(df, final_mask, ['Replacement Draw', 'Card Draw'], 'replacement draw effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging replacement draw effects: {str(e)}') raise - logger.info(f'Completed tagging replacement draw effects in {color}_cards.csv') - ## Wheels def tag_for_wheels(df: pd.DataFrame, color: str) -> None: """Tag cards that have wheel effects or care about drawing/discarding cards. @@ -1404,62 +1286,38 @@ def tag_for_wheels(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging "Wheel" effects in {color}_cards.csv') - try: - # Create masks for different wheel conditions - # Define text patterns for wheel effects + # Build text and name masks wheel_patterns = [ - 'an opponent draws a card', - 'cards you\'ve drawn', - 'draw your second card', - 'draw that many cards', - 'draws an additional card', - 'draws a card', - 'draws cards', - 'draws half that many cards', - 'draws their first second card', - 'draws their second second card', - 'draw two cards instead', - 'draws two additional cards', - 'discards that card', - 'discards their hand, then draws', - 'each card your opponents have drawn', - 'each draw a card', - 'each opponent draws a card', - 'each player draws', - 'has no cards in hand', - 'have no cards in hand', - 'may draw a card', - 'maximum hand size', - 'no cards in it, you win the game instead', - 'opponent discards', - 'you draw a card', - 'whenever you draw a card' + 'an opponent draws a card', 'cards you\'ve drawn', 'draw your second card', 'draw that many cards', + 'draws an additional card', 'draws a card', 'draws cards', 'draws half that many cards', + 'draws their first second card', 'draws their second second card', 'draw two cards instead', + 'draws two additional cards', 'discards that card', 'discards their hand, then draws', + 'each card your opponents have drawn', 'each draw a card', 'each opponent draws a card', + 'each player draws', 'has no cards in hand', 'have no cards in hand', 'may draw a card', + 'maximum hand size', 'no cards in it, you win the game instead', 'opponent discards', + 'you draw a card', 'whenever you draw a card' ] wheel_cards = [ 'arcane denial', 'bloodchief ascension', 'dark deal', 'elenda and azor', 'elixir of immortality', 'forced fruition', 'glunch, the bestower', 'kiora the rising tide', 'kynaios and tiro of meletis', - 'library of leng','loran of the third path', 'mr. foxglove', 'raffine, scheming seer', + 'library of leng', 'loran of the third path', 'mr. foxglove', 'raffine, scheming seer', 'sauron, the dark lord', 'seizan, perverter of truth', 'triskaidekaphile', 'twenty-toed toad', 'waste not', 'wedding ring', 'whispering madness' ] text_mask = tag_utils.create_text_mask(df, wheel_patterns) name_mask = tag_utils.create_name_mask(df, wheel_cards) - - # Combine masks final_mask = text_mask | name_mask - # Apply tags - tag_utils.apply_tag_vectorized(df, final_mask, ['Card Draw', 'Wheels']) - - # Add Draw Triggers tag for cards with trigger words + # Build trigger submask for Draw Triggers tag trigger_pattern = '|'.join(tag_constants.TRIGGERS) trigger_mask = final_mask & df['text'].str.contains(trigger_pattern, case=False, na=False) - tag_utils.apply_tag_vectorized(df, trigger_mask, ['Draw Triggers']) - - logger.info(f'Tagged {final_mask.sum()} cards with "Wheel" effects') + rules = [ + {'mask': final_mask, 'tags': ['Card Draw', 'Wheels']}, + {'mask': trigger_mask, 'tags': ['Draw Triggers']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'wheel effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging "Wheel" effects: {str(e)}') @@ -1492,13 +1350,10 @@ def tag_for_artifacts(df: pd.DataFrame, color: str) -> None: print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -1518,8 +1373,6 @@ def tag_for_artifacts(df: pd.DataFrame, color: str) -> None: tag_vehicles(df, color) logger.info('Completed Vehicle tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all "Artifact" and "Artifacts Matter" tagging in {duration.total_seconds():.2f}s') @@ -1543,29 +1396,21 @@ def tag_for_artifact_tokens(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info('Setting artifact token tags on %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Tag generic artifact tokens generic_mask = create_generic_artifact_mask(df) - if generic_mask.any(): - tag_utils.apply_rules(df, [{ - 'mask': generic_mask, - 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter'] - }]) - logger.info('Tagged %d cards with generic artifact token effects', generic_mask.sum()) - - # Tag predefined artifact tokens predefined_mask, token_map = create_predefined_artifact_mask(df) - if predefined_mask.any(): - # Apply base artifact token tags - tag_utils.apply_rules(df, [{ - 'mask': predefined_mask, - 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter'] - }]) + fabricate_mask = create_fabricate_mask(df) - # Group indices by token type and apply specific tags in batches + # Apply base artifact token tags via rules engine + rules = [ + {'mask': generic_mask, 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter']}, + {'mask': predefined_mask, 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter']}, + {'mask': fabricate_mask, 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'artifact tokens', color=color, logger=logger) + + # Apply specific token type tags (special handling for predefined tokens) + if predefined_mask.any(): token_to_indices: dict[str, list[int]] = {} for idx, token_type in token_map.items(): token_to_indices.setdefault(token_type, []).append(idx) @@ -1575,23 +1420,11 @@ def tag_for_artifact_tokens(df: pd.DataFrame, color: str) -> None: mask.loc[indices] = True tag_utils.apply_tag_vectorized(df, mask, [f'{token_type} Token']) - # Log results with token type counts - logger.info('Tagged %d cards with predefined artifact tokens:', predefined_mask.sum()) + # Log token type breakdown + logger.info('Predefined artifact token breakdown:') for token_type, indices in token_to_indices.items(): logger.info(' - %s: %d cards', token_type, len(indices)) - # Tag fabricate cards - fabricate_mask = create_fabricate_mask(df) - if fabricate_mask.any(): - tag_utils.apply_rules(df, [{ - 'mask': fabricate_mask, - 'tags': ['Artifact Tokens', 'Artifacts Matter', 'Token Creation', 'Tokens Matter'] - }]) - logger.info('Tagged %d cards with Fabricate', fabricate_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed artifact token tagging in %.2fs', duration) - except Exception as e: logger.error('Error in tag_for_artifact_tokens: %s', str(e)) raise @@ -1655,13 +1488,10 @@ def create_predefined_artifact_mask(df: pd.DataFrame) -> tuple[pd.Series, dict[i - Boolean Series indicating which cards create predefined artifact tokens - Dictionary mapping row indices to their matched token types """ - # Create base mask for 'create' text has_create = tag_utils.create_text_mask(df, tag_constants.CREATE_ACTION_PATTERN) # Initialize token mapping dictionary token_map = {} - - # Create masks for each token type token_masks = [] for token in tag_constants.ARTIFACT_TOKENS: @@ -1682,8 +1512,6 @@ def create_predefined_artifact_mask(df: pd.DataFrame) -> tuple[pd.Series, dict[i token_map[idx] = token token_masks.append(token_mask) - - # Combine all token masks final_mask = has_create & pd.concat(token_masks, axis=1).any(axis=1) return final_mask, token_map @@ -1745,15 +1573,11 @@ def create_artifact_triggers_mask(df: pd.DataFrame) -> pd.Series: 'whenever a nontoken artifact', 'whenever an artifact', 'whenever another nontoken artifact', 'whenever one or more artifact' ] - - # Combine all patterns all_patterns = ( ability_patterns + artifact_state_patterns + artifact_type_patterns + casting_patterns + counting_patterns + search_patterns + trigger_patterns + ['metalcraft', 'prowess', 'copy of any artifact'] ) - - # Create pattern string pattern = '|'.join(all_patterns) # Create mask @@ -1772,26 +1596,18 @@ def tag_for_artifact_triggers(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging cards that care about artifacts in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: # Create artifact triggers mask triggers_mask = create_artifact_triggers_mask(df) - - # Apply tags - tag_utils.apply_rules(df, [{ 'mask': triggers_mask, 'tags': ['Artifacts Matter'] }]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {triggers_mask.sum()} cards with artifact triggers in {duration:.2f}s') + tag_utils.tag_with_logging( + df, triggers_mask, ['Artifacts Matter'], + 'cards that care about artifacts', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging artifact triggers: {str(e)}') raise - logger.info(f'Completed tagging cards that care about artifacts in {color}_cards.csv') - ## Equipment def create_equipment_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards that are Equipment @@ -1864,24 +1680,16 @@ def tag_equipment(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - logger.info('Tagging Equipment cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create equipment mask - equipment_mask = create_equipment_mask(df) - if equipment_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': equipment_mask, 'tags': ['Equipment', 'Equipment Matters', 'Voltron'] }]) - logger.info('Tagged %d Equipment cards', equipment_mask.sum()) - - # Create equipment cares mask - cares_mask = create_equipment_cares_mask(df) - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Artifacts Matter', 'Equipment Matters', 'Voltron'] }]) - logger.info('Tagged %d cards that care about Equipment', cares_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Equipment tagging in %.2fs', duration) + # Apply tagging rules with enhanced utilities + rules = [ + { 'mask': create_equipment_mask(df), 'tags': ['Equipment', 'Equipment Matters', 'Voltron'] }, + { 'mask': create_equipment_cares_mask(df), 'tags': ['Artifacts Matter', 'Equipment Matters', 'Voltron'] } + ] + + tag_utils.tag_with_rules_and_logging( + df, rules, 'Equipment cards and cards that care about Equipment', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Equipment cards: %s', str(e)) @@ -1902,16 +1710,11 @@ def create_vehicle_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards are Vehicles or care about them """ - # Create type-based mask - type_mask = tag_utils.create_type_mask(df, ['Vehicle', 'Pilot']) - - # Create text-based mask - text_patterns = [ - 'vehicle', 'crew', 'pilot', - ] - text_mask = tag_utils.create_text_mask(df, text_patterns) - - return type_mask | text_mask + return tag_utils.build_combined_mask( + df, + type_patterns=['Vehicle', 'Pilot'], + text_patterns=['vehicle', 'crew', 'pilot'] + ) def tag_vehicles(df: pd.DataFrame, color: str) -> None: """Tag cards that are Vehicles or care about Vehicles using vectorized operations. @@ -1929,18 +1732,16 @@ def tag_vehicles(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - logger.info('Tagging Vehicle cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create vehicle mask - vehicle_mask = create_vehicle_mask(df) - if vehicle_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': vehicle_mask, 'tags': ['Artifacts Matter', 'Vehicles'] }]) - logger.info('Tagged %d Vehicle-related cards', vehicle_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Vehicle tagging in %.2fs', duration) + # Use enhanced tagging utility + tag_utils.tag_with_logging( + df, + create_vehicle_mask(df), + ['Artifacts Matter', 'Vehicles'], + 'Vehicle-related cards', + color=color, + logger=logger + ) except Exception as e: logger.error('Error tagging Vehicle cards: %s', str(e)) @@ -1977,13 +1778,10 @@ def tag_for_enchantments(df: pd.DataFrame, color: str) -> None: logger.info(f'Starting "Enchantment" and "Enchantments Matter" tagging for {color}_cards.csv') print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -2023,8 +1821,6 @@ def tag_for_enchantments(df: pd.DataFrame, color: str) -> None: tag_shrines(df, color) logger.info('Completed Shrine tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all "Enchantment" and "Enchantments Matter" tagging in {duration.total_seconds():.2f}s') @@ -2044,30 +1840,14 @@ def tag_for_enchantment_tokens(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info('Setting enchantment token tags on %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Tag generic enchantment tokens generic_mask = create_generic_enchantment_mask(df) - if generic_mask.any(): - tag_utils.apply_rules(df, [{ - 'mask': generic_mask, - 'tags': ['Enchantment Tokens', 'Enchantments Matter', 'Token Creation', 'Tokens Matter'] - }]) - logger.info('Tagged %d cards with generic enchantment token effects', generic_mask.sum()) - - # Tag predefined artifact tokens predefined_mask = create_predefined_enchantment_mask(df) - if predefined_mask.any(): - tag_utils.apply_rules(df, [{ - 'mask': predefined_mask, - 'tags': ['Enchantment Tokens', 'Enchantments Matter', 'Token Creation', 'Tokens Matter'] - }]) - logger.info('Tagged %d cards with predefined enchantment tokens', predefined_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed enchantment token tagging in %.2fs', duration) + rules = [ + {'mask': generic_mask, 'tags': ['Enchantment Tokens', 'Enchantments Matter', 'Token Creation', 'Tokens Matter']}, + {'mask': predefined_mask, 'tags': ['Enchantment Tokens', 'Enchantments Matter', 'Token Creation', 'Tokens Matter']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'enchantment tokens', color=color, logger=logger) except Exception as e: logger.error('Error in tag_for_enchantment_tokens: %s', str(e)) @@ -2115,8 +1895,6 @@ def create_predefined_enchantment_mask(df: pd.DataFrame) -> pd.Series: """ # Create text pattern matches has_create = tag_utils.create_text_mask(df, tag_constants.CREATE_ACTION_PATTERN) - - # Create masks for each token type token_masks = [] for token in tag_constants.ENCHANTMENT_TOKENS: token_mask = tag_utils.create_text_mask(df, token.lower()) @@ -2139,9 +1917,6 @@ def tag_for_enchantments_matter(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging cards that care about enchantments in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: # Define enchantment-related patterns ability_patterns = [ @@ -2175,8 +1950,6 @@ def tag_for_enchantments_matter(df: pd.DataFrame, color: str) -> None: 'whenever a nontoken enchantment', 'whenever an enchantment', 'whenever another nontoken enchantment', 'whenever one or more enchantment' ] - - # Combine all patterns and build masks all_patterns = ( ability_patterns + state_patterns + type_patterns + casting_patterns + counting_patterns + search_patterns + trigger_patterns @@ -2190,12 +1963,9 @@ def tag_for_enchantments_matter(df: pd.DataFrame, color: str) -> None: final_mask = triggers_mask & ~exclusion_mask # Apply tag - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Enchantments Matter'] }]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info( - f'Tagged {final_mask.sum()} cards with enchantment triggers in {duration:.2f}s' + tag_utils.tag_with_logging( + df, final_mask, ['Enchantments Matter'], + 'cards that care about enchantments', color=color, logger=logger ) except Exception as e: @@ -2221,31 +1991,21 @@ def tag_auras(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - logger.info('Tagging Aura cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create Aura mask aura_mask = tag_utils.create_type_mask(df, 'Aura') - if aura_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': aura_mask, 'tags': ['Auras', 'Enchantments Matter', 'Voltron'] }]) - logger.info('Tagged %d Aura cards', aura_mask.sum()) - - # Create cares mask - text_patterns = [ - 'aura', - 'aura enters', - 'aura you control enters', - 'enchanted' - ] - cares_mask = tag_utils.create_text_mask(df, text_patterns) | tag_utils.create_name_mask(df, tag_constants.AURA_SPECIFIC_CARDS) - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Auras', 'Enchantments Matter', 'Voltron'] }]) - logger.info('Tagged %d cards that care about Auras', cares_mask.sum()) + cares_mask = tag_utils.build_combined_mask( + df, + text_patterns=['aura', 'aura enters', 'aura you control enters', 'enchanted'], + name_list=tag_constants.AURA_SPECIFIC_CARDS + ) - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Aura tagging in %.2fs', duration) - + rules = [ + {'mask': aura_mask, 'tags': ['Auras', 'Enchantments Matter', 'Voltron']}, + {'mask': cares_mask, 'tags': ['Auras', 'Enchantments Matter', 'Voltron']} + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Aura cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Aura cards: %s', str(e)) raise @@ -2258,26 +2018,15 @@ def tag_constellation(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Constellation cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create mask for constellation keyword constellation_mask = tag_utils.create_keyword_mask(df, 'Constellation') - - # Apply tags - tag_utils.apply_rules(df, [{ 'mask': constellation_mask, 'tags': ['Constellation', 'Enchantments Matter'] }]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {constellation_mask.sum()} Constellation cards in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, constellation_mask, ['Constellation', 'Enchantments Matter'], 'Constellation cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Constellation cards: {str(e)}') raise - logger.info('Completed tagging Constellation cards') - ## Sagas def tag_sagas(df: pd.DataFrame, color: str) -> None: """Tag cards with the Saga type using vectorized operations. @@ -2289,36 +2038,20 @@ def tag_sagas(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Saga cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for Saga type saga_mask = tag_utils.create_type_mask(df, 'Saga') - if saga_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': saga_mask, 'tags': ['Enchantments Matter', 'Sagas Matter'] }]) - logger.info('Tagged %d Saga cards', saga_mask.sum()) + cares_mask = tag_utils.create_text_mask(df, ['saga', 'put a saga', 'final chapter', 'lore counter']) - # Create mask for cards that care about Sagas - text_patterns = [ - 'saga', - 'put a saga', - 'final chapter', - 'lore counter' + rules = [ + {'mask': saga_mask, 'tags': ['Enchantments Matter', 'Sagas Matter']}, + {'mask': cares_mask, 'tags': ['Enchantments Matter', 'Sagas Matter']} ] - cares_mask = tag_utils.create_text_mask(df, text_patterns) # create_saga_cares_mask(df) - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Enchantments Matter', 'Sagas Matter'] }]) - logger.info('Tagged %d cards that care about Sagas', cares_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Saga tagging in %.2fs', duration) - + tag_utils.tag_with_rules_and_logging( + df, rules, 'Saga cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Saga cards: {str(e)}') raise - - logger.info('Completed tagging Saga cards') ## Cases def tag_cases(df: pd.DataFrame, color: str) -> None: @@ -2331,31 +2064,21 @@ def tag_cases(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Case cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for Case type - saga_mask = tag_utils.create_type_mask(df, 'Case') - if saga_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': saga_mask, 'tags': ['Enchantments Matter', 'Cases Matter'] }]) - logger.info('Tagged %d Case cards', saga_mask.sum()) - - # Create Case cares_mask + case_mask = tag_utils.create_type_mask(df, 'Case') cares_mask = tag_utils.create_text_mask(df, 'solve a case') - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Enchantments Matter', 'Cases Matter'] }]) - logger.info('Tagged %d cards that care about Cases', cares_mask.sum()) - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Case tagging in %.2fs', duration) - + rules = [ + {'mask': case_mask, 'tags': ['Enchantments Matter', 'Cases Matter']}, + {'mask': cares_mask, 'tags': ['Enchantments Matter', 'Cases Matter']} + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Case cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Case cards: {str(e)}') raise - logger.info('Completed tagging Case cards') - ## Rooms def tag_rooms(df: pd.DataFrame, color: str) -> None: """Tag cards with the room subtype using vectorized operations. @@ -2367,36 +2090,23 @@ def tag_rooms(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Room cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for Room type room_mask = tag_utils.create_type_mask(df, 'Room') - if room_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': room_mask, 'tags': ['Enchantments Matter', 'Rooms Matter'] }]) - logger.info('Tagged %d Room cards', room_mask.sum()) - - # Create keyword mask for rooms keyword_mask = tag_utils.create_keyword_mask(df, 'Eerie') - if keyword_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': keyword_mask, 'tags': ['Enchantments Matter', 'Rooms Matter'] }]) - - # Create rooms care mask cares_mask = tag_utils.create_text_mask(df, 'target room') - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Enchantments Matter', 'Rooms Matter'] }]) - logger.info('Tagged %d cards that care about Rooms', cares_mask.sum()) - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Room tagging in %.2fs', duration) - + rules = [ + {'mask': room_mask, 'tags': ['Enchantments Matter', 'Rooms Matter']}, + {'mask': keyword_mask, 'tags': ['Enchantments Matter', 'Rooms Matter']}, + {'mask': cares_mask, 'tags': ['Enchantments Matter', 'Rooms Matter']} + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Room cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Room cards: {str(e)}') raise - logger.info('Completed tagging Room cards') - ## Classes def tag_classes(df: pd.DataFrame, color: str) -> None: """Tag cards with the Class subtype using vectorized operations. @@ -2408,25 +2118,15 @@ def tag_classes(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Class cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for class type class_mask = tag_utils.create_type_mask(df, 'Class') - if class_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': class_mask, 'tags': ['Enchantments Matter', 'Classes Matter'] }]) - logger.info('Tagged %d Class cards', class_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Class tagging in %.2fs', duration) - + tag_utils.tag_with_logging( + df, class_mask, ['Enchantments Matter', 'Classes Matter'], 'Class cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Class cards: {str(e)}') raise - logger.info('Completed tagging Class cards') - ## Background def tag_backgrounds(df: pd.DataFrame, color: str) -> None: """Tag cards with the Background subtype or which let you choose a background using vectorized operations. @@ -2438,30 +2138,20 @@ def tag_backgrounds(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Background cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for background type class_mask = tag_utils.create_type_mask(df, 'Background') - if class_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': class_mask, 'tags': ['Enchantments Matter', 'Backgrounds Matter'] }]) - logger.info('Tagged %d Background cards', class_mask.sum()) - - # Create mask for Choose a Background cares_mask = tag_utils.create_text_mask(df, 'Background') - if cares_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': cares_mask, 'tags': ['Enchantments Matter', 'Backgrounds Matter'] }]) - logger.info('Tagged %d cards that have Choose a Background', cares_mask.sum()) - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Background tagging in %.2fs', duration) - + rules = [ + {'mask': class_mask, 'tags': ['Enchantments Matter', 'Backgrounds Matter']}, + {'mask': cares_mask, 'tags': ['Enchantments Matter', 'Backgrounds Matter']} + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Background cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Background cards: {str(e)}') raise - - logger.info('Completed tagging Background cards') ## Shrines def tag_shrines(df: pd.DataFrame, color: str) -> None: @@ -2474,25 +2164,15 @@ def tag_shrines(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFramecolumns are missing """ - logger.info('Tagging Shrine cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create mask for shrine type class_mask = tag_utils.create_type_mask(df, 'Shrine') - if class_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': class_mask, 'tags': ['Enchantments Matter', 'Shrines Matter'] }]) - logger.info('Tagged %d Shrine cards', class_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Shrine tagging in %.2fs', duration) - + tag_utils.tag_with_logging( + df, class_mask, ['Enchantments Matter', 'Shrines Matter'], 'Shrine cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Shrine cards: {str(e)}') raise - logger.info('Completed tagging Shrine cards') - ### Exile Matters ## Exile Matter effects, such as Impulse draw, foretell, etc... def tag_for_exile_matters(df: pd.DataFrame, color: str) -> None: @@ -2522,13 +2202,10 @@ def tag_for_exile_matters(df: pd.DataFrame, color: str) -> None: logger.info(f'Starting "Exile Matters" tagging for {color}_cards.csv') print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -2573,8 +2250,6 @@ def tag_for_exile_matters(df: pd.DataFrame, color: str) -> None: tag_for_time_counters(df, color) logger.info('Completed Time Counters tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all "Exile Matters" tagging in {duration.total_seconds():.2f}s') @@ -2597,9 +2272,6 @@ def tag_for_general_exile_matters(df: pd.DataFrame, color: str) -> None: Raises: ValueError: if required DataFrame columns are missing """ - logger.info('Tagging Exile Matters cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: # Create exile mask text_patterns = [ @@ -2622,13 +2294,9 @@ def tag_for_general_exile_matters(df: pd.DataFrame, color: str) -> None: 'remains exiled' ] text_mask = tag_utils.create_text_mask(df, text_patterns) - if text_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': text_mask, 'tags': ['Exile Matters'] }]) - logger.info('Tagged %d Exile Matters cards', text_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Exile Matters tagging in %.2fs', duration) - + tag_utils.tag_with_logging( + df, text_mask, ['Exile Matters'], 'General Exile Matters cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Exile Matters cards: %s', str(e)) raise @@ -2644,31 +2312,18 @@ def tag_for_cascade(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - logger.info('Tagging Cascade cards in %s_cards.csv', color) - start_time = pd.Timestamp.now() - try: - # Create Cascade mask - text_patterns = [ - 'gain cascade', - 'has cascade', - 'have cascade', - 'have "cascade', - 'with cascade', - ] + text_patterns = ['gain cascade', 'has cascade', 'have cascade', 'have "cascade', 'with cascade'] text_mask = tag_utils.create_text_mask(df, text_patterns) - if text_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': text_mask, 'tags': ['Cascade', 'Exile Matters'] }]) - logger.info('Tagged %d cards relating to Cascade', text_mask.sum()) - keyword_mask = tag_utils.create_keyword_mask(df, 'Cascade') - if keyword_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': keyword_mask, 'tags': ['Cascade', 'Exile Matters'] }]) - logger.info('Tagged %d cards that have Cascade', keyword_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Cascade tagging in %.2fs', duration) - + + rules = [ + {'mask': text_mask, 'tags': ['Cascade', 'Exile Matters']}, + {'mask': keyword_mask, 'tags': ['Cascade', 'Exile Matters']} + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Cascade cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Cascade cards: %s', str(e)) raise @@ -2681,26 +2336,15 @@ def tag_for_discover(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Discover cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create mask for Discover keyword keyword_mask = tag_utils.create_keyword_mask(df, 'Discover') - - # Apply tags - tag_utils.apply_rules(df, [{ 'mask': keyword_mask, 'tags': ['Discover', 'Exile Matters'] }]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {keyword_mask.sum()} Discover cards in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, keyword_mask, ['Discover', 'Exile Matters'], 'Discover cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Discover cards: {str(e)}') raise - logger.info('Completed tagging Discover cards') - ## Foretell cards, and cards that care about foretell def tag_for_foretell(df: pd.DataFrame, color: str) -> None: """Tag cards with Foretell using vectorized operations. @@ -2709,25 +2353,17 @@ def tag_for_foretell(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Foretell cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Foretell - keyword_mask = tag_utils.create_keyword_mask(df, 'Foretell') - text_mask = tag_utils.create_text_mask(df, 'Foretell') - - final_mask = keyword_mask | text_mask - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Foretell', 'Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} Foretell cards in {duration:.2f}s') + final_mask = tag_utils.build_combined_mask( + df, keyword_patterns='Foretell', text_patterns='Foretell' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Foretell', 'Exile Matters'], 'Foretell cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Foretell cards: {str(e)}') raise - logger.info('Completed tagging Foretell cards') - ## Cards that have or care about imprint def tag_for_imprint(df: pd.DataFrame, color: str) -> None: """Tag cards with Imprint using vectorized operations. @@ -2736,25 +2372,17 @@ def tag_for_imprint(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Imprint cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Imprint - keyword_mask = tag_utils.create_keyword_mask(df, 'Imprint') - text_mask = tag_utils.create_text_mask(df, 'Imprint') - - final_mask = keyword_mask | text_mask - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Imprint', 'Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} Imprint cards in {duration:.2f}s') + final_mask = tag_utils.build_combined_mask( + df, keyword_patterns='Imprint', text_patterns='Imprint' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Imprint', 'Exile Matters'], 'Imprint cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Imprint cards: {str(e)}') raise - logger.info('Completed tagging Imprint cards') - ## Cards that have or care about impulse def create_impulse_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards with impulse-like effects. @@ -2811,8 +2439,6 @@ def create_impulse_mask(df: pd.DataFrame) -> pd.Series: planeswalker_mask = df['type'].str.contains('Planeswalker', case=False, na=False) second_exclusion_mask = tag_utils.create_text_mask(df, secondary_exclusion_patterns) exclusion_mask = (~first_exclusion_mask & ~planeswalker_mask) & second_exclusion_mask - - # Combine masks impulse_mask = ((exile_mask & play_mask & ~exclusion_mask & ~tag_mask) | named_mask | junk_mask) @@ -2831,30 +2457,20 @@ def tag_for_impulse(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Impulse effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create impulse mask + # Build masks impulse_mask = create_impulse_mask(df) - - # Build rules for base impulse and Junk token subtype junk_mask = tag_utils.create_text_mask(df, 'junk token') - tag_utils.apply_rules(df, [ - { 'mask': impulse_mask, 'tags': ['Exile Matters', 'Impulse'] }, - { 'mask': (impulse_mask & junk_mask), 'tags': ['Junk Tokens'] }, - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {impulse_mask.sum()} cards with Impulse effects in {duration:.2f}s') + rules = [ + {'mask': impulse_mask, 'tags': ['Exile Matters', 'Impulse']}, + {'mask': (impulse_mask & junk_mask), 'tags': ['Junk Tokens']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'impulse effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging Impulse effects: {str(e)}') raise - logger.info('Completed tagging Impulse effects') - ## Cards that have or care about plotting def tag_for_plot(df: pd.DataFrame, color: str) -> None: """Tag cards with Plot using vectorized operations. @@ -2863,25 +2479,17 @@ def tag_for_plot(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Plot cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Plot - keyword_mask = tag_utils.create_keyword_mask(df, 'Plot') - text_mask = tag_utils.create_text_mask(df, 'Plot') - - final_mask = keyword_mask | text_mask - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Plot', 'Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} Plot cards in {duration:.2f}s') + final_mask = tag_utils.build_combined_mask( + df, keyword_patterns='Plot', text_patterns='Plot' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Plot', 'Exile Matters'], 'Plot cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Plot cards: {str(e)}') raise - logger.info('Completed tagging Plot cards') - ## Cards that have or care about suspend def tag_for_suspend(df: pd.DataFrame, color: str) -> None: """Tag cards with Suspend using vectorized operations. @@ -2890,25 +2498,17 @@ def tag_for_suspend(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Suspend cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Suspend - keyword_mask = tag_utils.create_keyword_mask(df, 'Suspend') - text_mask = tag_utils.create_text_mask(df, 'Suspend') - - final_mask = keyword_mask | text_mask - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Suspend', 'Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} Suspend cards in {duration:.2f}s') + final_mask = tag_utils.build_combined_mask( + df, keyword_patterns='Suspend', text_patterns='Suspend' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Suspend', 'Exile Matters'], 'Suspend cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Suspend cards: {str(e)}') raise - logger.info('Completed tagging Suspend cards') - ## Cards that have or care about Warp def tag_for_warp(df: pd.DataFrame, color: str) -> None: """Tag cards with Warp using vectorized operations. @@ -2917,25 +2517,17 @@ def tag_for_warp(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Warp cards in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Warp - keyword_mask = tag_utils.create_keyword_mask(df, 'Warp') - text_mask = tag_utils.create_text_mask(df, 'Warp') - - final_mask = keyword_mask | text_mask - tag_utils.apply_rules(df, [{ 'mask': final_mask, 'tags': ['Warp', 'Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} Warp cards in {duration:.2f}s') + final_mask = tag_utils.build_combined_mask( + df, keyword_patterns='Warp', text_patterns='Warp' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Warp', 'Exile Matters'], 'Warp cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Warp cards: {str(e)}') raise - logger.info('Completed tagging Warp cards') - def create_time_counters_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards that mention time counters or Time Travel. @@ -2975,28 +2567,21 @@ def tag_for_time_counters(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Time Counters interactions in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: time_mask = create_time_counters_mask(df) - if not time_mask.any(): - logger.info('No Time Counters interactions found') - return - - # Always tag Time Counters - tag_utils.apply_rules(df, [{ 'mask': time_mask, 'tags': ['Time Counters'] }]) - + # Conditionally add Exile Matters if the card references exile or suspend exile_mask = tag_utils.create_text_mask(df, tag_constants.PATTERN_GROUPS['exile']) suspend_mask = tag_utils.create_keyword_mask(df, 'Suspend') | tag_utils.create_text_mask(df, 'Suspend') time_exile_mask = time_mask & (exile_mask | suspend_mask) - if time_exile_mask.any(): - tag_utils.apply_rules(df, [{ 'mask': time_exile_mask, 'tags': ['Exile Matters'] }]) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Time Counters tagging in %.2fs', duration) - + + rules = [ + { 'mask': time_mask, 'tags': ['Time Counters'] }, + { 'mask': time_exile_mask, 'tags': ['Exile Matters'] } + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Time Counters cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Time Counters interactions: {str(e)}') raise @@ -3011,10 +2596,7 @@ def create_creature_token_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards create creature tokens """ - # Create base pattern for token creation has_create = tag_utils.create_text_mask(df, tag_constants.CREATE_ACTION_PATTERN) - - # Create pattern for creature tokens token_patterns = [ 'artifact creature token', 'creature token', @@ -3041,7 +2623,6 @@ def create_token_modifier_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards modify token creation """ - # Create patterns for token modification modifier_patterns = [ 'create one or more', 'one or more creature', @@ -3052,8 +2633,6 @@ def create_token_modifier_mask(df: pd.DataFrame) -> pd.Series: 'put one or more' ] has_modifier = tag_utils.create_text_mask(df, modifier_patterns) - - # Create patterns for token effects effect_patterns = ['instead', 'plus'] has_effect = tag_utils.create_text_mask(df, effect_patterns) @@ -3076,7 +2655,6 @@ def create_tokens_matter_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards care about tokens """ - # Create patterns for token matters text_patterns = [ 'tokens.*you.*control', 'that\'s a token', @@ -3100,12 +2678,9 @@ def tag_for_tokens(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info('Tagging token-related cards in %s_cards.csv', color) print('\n==========\n') try: - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -3114,14 +2689,7 @@ def tag_for_tokens(df: pd.DataFrame, color: str) -> None: modifier_mask = create_token_modifier_mask(df) matters_mask = create_tokens_matter_mask(df) - # Apply via small rules engine - tag_utils.apply_rules(df, [ - { 'mask': creature_mask, 'tags': ['Creature Tokens', 'Token Creation', 'Tokens Matter'] }, - { 'mask': modifier_mask, 'tags': ['Token Modification', 'Token Creation', 'Tokens Matter'] }, - { 'mask': matters_mask, 'tags': ['Tokens Matter'] }, - ]) - - # Eldrazi Spawn/Scion special-casing: add Aristocrats and Ramp synergy tags + # Eldrazi Spawn/Scion special case spawn_patterns = [ 'eldrazi spawn creature token', 'eldrazi scion creature token', @@ -3129,21 +2697,13 @@ def tag_for_tokens(df: pd.DataFrame, color: str) -> None: 'scion creature token with "sacrifice' ] spawn_scion_mask = tag_utils.create_text_mask(df, spawn_patterns) - if spawn_scion_mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': spawn_scion_mask, 'tags': ['Aristocrats', 'Ramp'] } - ]) - - # Logging - if creature_mask.any(): - logger.info('Tagged %d cards that create creature tokens', creature_mask.sum()) - if modifier_mask.any(): - logger.info('Tagged %d cards that modify token creation', modifier_mask.sum()) - if matters_mask.any(): - logger.info('Tagged %d cards that care about tokens', matters_mask.sum()) - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed token tagging in %.2fs', duration) + rules = [ + {'mask': creature_mask, 'tags': ['Creature Tokens', 'Token Creation', 'Tokens Matter']}, + {'mask': modifier_mask, 'tags': ['Token Modification', 'Token Creation', 'Tokens Matter']}, + {'mask': matters_mask, 'tags': ['Tokens Matter']}, + {'mask': spawn_scion_mask, 'tags': ['Aristocrats', 'Ramp']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'token-related cards', color=color, logger=logger) except Exception as e: logger.error('Error tagging token cards: %s', str(e)) @@ -3158,12 +2718,12 @@ def tag_for_freerunning(df: pd.DataFrame, color: str) -> None: try: required = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required) - mask = tag_utils.create_keyword_mask(df, 'Freerunning') | tag_utils.create_text_mask(df, ['freerunning', 'free running']) - if mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': mask, 'tags': ['Cost Reduction', 'Freerunning'] } - ]) - logger.info('Tagged %d Freerunning cards', mask.sum()) + mask = tag_utils.build_combined_mask( + df, keyword_patterns='Freerunning', text_patterns=['freerunning', 'free running'] + ) + tag_utils.tag_with_logging( + df, mask, ['Cost Reduction', 'Freerunning'], 'Freerunning cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Freerunning: %s', str(e)) raise @@ -3172,44 +2732,39 @@ def tag_for_freerunning(df: pd.DataFrame, color: str) -> None: def tag_for_craft(df: pd.DataFrame, color: str) -> None: """Tag cards with Craft. Adds Transform; conditionally adds Artifacts Matter, Exile Matters, and Graveyard Matters.""" try: - required = {'text', 'themeTags'} - tag_utils.validate_dataframe_columns(df, required) craft_mask = tag_utils.create_keyword_mask(df, 'Craft') | tag_utils.create_text_mask(df, ['craft with', 'craft —', ' craft ']) - if craft_mask.any(): - rules = [{ 'mask': craft_mask, 'tags': ['Transform'] }] - # Conditionals - artifact_cond = craft_mask & tag_utils.create_text_mask(df, ['artifact', 'artifacts']) - exile_cond = craft_mask & tag_utils.create_text_mask(df, ['exile']) - gy_cond = craft_mask & tag_utils.create_text_mask(df, ['graveyard']) - if artifact_cond.any(): - rules.append({ 'mask': artifact_cond, 'tags': ['Artifacts Matter'] }) - if exile_cond.any(): - rules.append({ 'mask': exile_cond, 'tags': ['Exile Matters'] }) - if gy_cond.any(): - rules.append({ 'mask': gy_cond, 'tags': ['Graveyard Matters'] }) - tag_utils.apply_rules(df, rules) - logger.info('Tagged %d Craft cards', craft_mask.sum()) + + # Conditionals + artifact_cond = craft_mask & tag_utils.create_text_mask(df, ['artifact', 'artifacts']) + exile_cond = craft_mask & tag_utils.create_text_mask(df, ['exile']) + gy_cond = craft_mask & tag_utils.create_text_mask(df, ['graveyard']) + + rules = [ + { 'mask': craft_mask, 'tags': ['Transform'] }, + { 'mask': artifact_cond, 'tags': ['Artifacts Matter'] }, + { 'mask': exile_cond, 'tags': ['Exile Matters'] }, + { 'mask': gy_cond, 'tags': ['Graveyard Matters'] } + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Craft cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Craft: %s', str(e)) raise -### Spree (modal, cost-scaling spells) def tag_for_spree(df: pd.DataFrame, color: str) -> None: """Tag Spree spells with Modal and Cost Scaling.""" try: - required = {'text', 'themeTags'} - tag_utils.validate_dataframe_columns(df, required) - mask = tag_utils.create_keyword_mask(df, 'Spree') | tag_utils.create_text_mask(df, ['spree']) - if mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': mask, 'tags': ['Modal', 'Cost Scaling'] } - ]) - logger.info('Tagged %d Spree cards', mask.sum()) + mask = tag_utils.build_combined_mask( + df, keyword_patterns='Spree', text_patterns='spree' + ) + tag_utils.tag_with_logging( + df, mask, ['Modal', 'Cost Scaling'], 'Spree cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Spree: %s', str(e)) raise -### Explore and Map tokens def tag_for_explore_and_map(df: pd.DataFrame, color: str) -> None: """Tag Explore and Map token interactions. @@ -3217,24 +2772,17 @@ def tag_for_explore_and_map(df: pd.DataFrame, color: str) -> None: - Map Tokens: add Card Selection and Tokens Matter """ try: - required = {'text', 'themeTags'} - tag_utils.validate_dataframe_columns(df, required) explore_mask = tag_utils.create_keyword_mask(df, 'Explore') | tag_utils.create_text_mask(df, ['explores', 'explore.']) map_mask = tag_utils.create_text_mask(df, ['map token', 'map tokens']) - rules = [] - if explore_mask.any(): - rules.append({ 'mask': explore_mask, 'tags': ['Card Selection'] }) - # If the text also references +1/+1 counters, add that theme - # Use literal match for '+1/+1 counter' to avoid regex errors on '+' at start - explore_counters = explore_mask & tag_utils.create_text_mask(df, ['+1/+1 counter'], regex=False) - if explore_counters.any(): - rules.append({ 'mask': explore_counters, 'tags': ['+1/+1 Counters'] }) - if map_mask.any(): - rules.append({ 'mask': map_mask, 'tags': ['Card Selection', 'Tokens Matter'] }) - if rules: - tag_utils.apply_rules(df, rules) - total = (explore_mask.astype(int) + map_mask.astype(int)).astype(bool).sum() - logger.info('Tagged %d Explore/Map cards', total) + explore_counters = explore_mask & tag_utils.create_text_mask(df, ['+1/+1 counter'], regex=False) + rules = [ + { 'mask': explore_mask, 'tags': ['Card Selection'] }, + { 'mask': explore_counters, 'tags': ['+1/+1 Counters'] }, + { 'mask': map_mask, 'tags': ['Card Selection', 'Tokens Matter'] } + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Explore/Map cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Explore/Map: %s', str(e)) raise @@ -3246,9 +2794,9 @@ def tag_for_rad_counters(df: pd.DataFrame, color: str) -> None: required = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required) rad_mask = tag_utils.create_text_mask(df, ['rad counter', 'rad counters']) - if rad_mask.any(): - tag_utils.apply_rules(df, [ { 'mask': rad_mask, 'tags': ['Rad Counters'] } ]) - logger.info('Tagged %d Rad counter cards', rad_mask.sum()) + tag_utils.tag_with_logging( + df, rad_mask, ['Rad Counters'], 'Rad counter cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Rad counters: %s', str(e)) raise @@ -3263,9 +2811,6 @@ def tag_for_discard_matters(df: pd.DataFrame, color: str) -> None: Also adds Loot where applicable is handled elsewhere; this focuses on the theme surface. """ try: - required = {'text', 'themeTags'} - tag_utils.validate_dataframe_columns(df, required) - # Events where YOU discard (as part of a cost or effect). Keep generic 'discard a card' but filter out opponent/each-player cases. discard_action_patterns = [ r'you discard (?:a|one|two|three|x) card', @@ -3299,9 +2844,9 @@ def tag_for_discard_matters(df: pd.DataFrame, color: str) -> None: madness_mask = tag_utils.create_text_mask(df, [r'\bmadness\b']) final_mask = ((action_mask & ~exclude_mask) | trigger_mask | blood_mask | madness_mask) - if final_mask.any(): - tag_utils.apply_rules(df, [ { 'mask': final_mask, 'tags': ['Discard Matters'] } ]) - logger.info('Tagged %d cards for Discard Matters', final_mask.sum()) + tag_utils.tag_with_logging( + df, final_mask, ['Discard Matters'], 'Discard Matters cards', color=color, logger=logger + ) except Exception as e: logger.error('Error tagging Discard Matters: %s', str(e)) raise @@ -3330,13 +2875,10 @@ def tag_for_life_matters(df: pd.DataFrame, color: str) -> None: print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'creatureTypes'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -3360,8 +2902,6 @@ def tag_for_life_matters(df: pd.DataFrame, color: str) -> None: tag_for_life_kindred(df, color) logger.info('Completed life kindred tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all "Life Matters" tagging in {duration.total_seconds():.2f}s') @@ -3376,11 +2916,7 @@ def tag_for_lifegain(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging lifegain effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for different lifegain patterns gain_mask = ( tag_utils.create_numbered_phrase_mask(df, ['gain', 'gains'], 'life') | tag_utils.create_text_mask(df, ['gain life', 'gains life']) @@ -3393,21 +2929,13 @@ def tag_for_lifegain(df: pd.DataFrame, color: str) -> None: final_mask = gain_mask & ~replacement_mask trigger_mask = tag_utils.create_text_mask(df, ['if you would gain life', 'whenever you gain life']) - # Apply via rules engine - tag_utils.apply_rules(df, [ + rules = [ { 'mask': final_mask, 'tags': ['Lifegain', 'Life Matters'] }, { 'mask': trigger_mask, 'tags': ['Lifegain', 'Lifegain Triggers', 'Life Matters'] }, - ]) - - # Logging - if final_mask.any(): - logger.info(f'Tagged {final_mask.sum()} cards with lifegain effects') - if trigger_mask.any(): - logger.info(f'Tagged {trigger_mask.sum()} cards with lifegain triggers') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed lifegain tagging in {duration:.2f}s') - + ] + tag_utils.tag_with_rules_and_logging( + df, rules, 'Lifegain cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging lifegain effects: {str(e)}') raise @@ -3419,11 +2947,7 @@ def tag_for_lifelink(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging lifelink effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for different lifelink patterns lifelink_mask = tag_utils.create_text_mask(df, 'lifelink') lifelike_mask = tag_utils.create_text_mask(df, [ 'deals damage, you gain that much life', @@ -3433,20 +2957,12 @@ def tag_for_lifelink(df: pd.DataFrame, color: str) -> None: # Exclude combat damage references for life loss conversion damage_mask = tag_utils.create_text_mask(df, 'deals damage') life_loss_mask = lifelike_mask & ~damage_mask - - # Combine masks final_mask = lifelink_mask | lifelike_mask | life_loss_mask - # Apply tags - if final_mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': final_mask, 'tags': ['Lifelink', 'Lifegain', 'Life Matters'] }, - ]) - logger.info(f'Tagged {final_mask.sum()} cards with lifelink effects') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed lifelink tagging in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, final_mask, ['Lifelink', 'Lifegain', 'Life Matters'], + 'Lifelink cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging lifelink effects: {str(e)}') raise @@ -3458,11 +2974,7 @@ def tag_for_life_loss(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging life loss effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for different life loss patterns text_patterns = [ 'you lost life', 'you gained and lost life', @@ -3475,16 +2987,10 @@ def tag_for_life_loss(df: pd.DataFrame, color: str) -> None: ] text_mask = tag_utils.create_text_mask(df, text_patterns) - # Apply tags - if text_mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': text_mask, 'tags': ['Lifeloss', 'Lifeloss Triggers', 'Life Matters'] }, - ]) - logger.info(f'Tagged {text_mask.sum()} cards with life loss effects') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed life loss tagging in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, text_mask, ['Lifeloss', 'Lifeloss Triggers', 'Life Matters'], + 'Life loss cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging life loss effects: {str(e)}') raise @@ -3496,27 +3002,13 @@ def tag_for_food(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging Food token in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for Food tokens - text_mask = tag_utils.create_text_mask(df, 'food') - type_mask = tag_utils.create_type_mask(df, 'food') - - # Combine masks - final_mask = text_mask | type_mask - - # Apply tags - if final_mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': final_mask, 'tags': ['Food', 'Lifegain', 'Life Matters'] }, - ]) - logger.info(f'Tagged {final_mask.sum()} cards with Food effects') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed Food tagging in {duration:.2f}s') - + final_mask = tag_utils.build_combined_mask( + df, text_patterns='food', type_patterns='food' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Food', 'Lifegain', 'Life Matters'], 'Food cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Food effects: {str(e)}') raise @@ -3528,24 +3020,14 @@ def tag_for_life_kindred(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging life-related kindred effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create mask for life-related creature types life_tribes = ['Angel', 'Bat', 'Cleric', 'Vampire'] kindred_mask = df['creatureTypes'].apply(lambda x: any(tribe in x for tribe in life_tribes)) - - # Apply tags - if kindred_mask.any(): - tag_utils.apply_rules(df, [ - { 'mask': kindred_mask, 'tags': ['Lifegain', 'Life Matters'] }, - ]) - logger.info(f'Tagged {kindred_mask.sum()} cards with life-related kindred effects') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed life kindred tagging in {duration:.2f}s') - + + tag_utils.tag_with_logging( + df, kindred_mask, ['Lifegain', 'Life Matters'], 'life-related kindred cards', + color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging life kindred effects: {str(e)}') raise @@ -3576,13 +3058,10 @@ def tag_for_counters(df: pd.DataFrame, color: str) -> None: print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'name', 'creatureTypes'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -3602,8 +3081,6 @@ def tag_for_counters(df: pd.DataFrame, color: str) -> None: tag_for_special_counters(df, color) logger.info('Completed special counter tagging') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all counter-related tagging in {duration.total_seconds():.2f}s') @@ -3618,11 +3095,7 @@ def tag_for_general_counters(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging general counter effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create masks for different counter patterns text_patterns = [ 'choose a kind of counter', 'if it had counters', @@ -3633,25 +3106,17 @@ def tag_for_general_counters(df: pd.DataFrame, color: str) -> None: 'with counters on them' ] text_mask = tag_utils.create_text_mask(df, text_patterns) - - # Create mask for specific cards specific_cards = [ 'banner of kinship', 'damning verdict', 'ozolith' ] name_mask = tag_utils.create_name_mask(df, specific_cards) - - # Combine masks final_mask = text_mask | name_mask - # Apply tags - tag_utils.apply_tag_vectorized(df, final_mask, ['Counters Matter']) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with general counter effects in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, final_mask, ['Counters Matter'], 'General counter cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging general counter effects: {str(e)}') raise @@ -3663,36 +3128,25 @@ def tag_for_plus_counters(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging +1/+1 counter effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Create text pattern mask - text_patterns = [ - r'\+1/\+1 counter', - r'if it had counters', - r'one or more counters', - r'one or more \+1/\+1 counter', - r'proliferate', - r'undying', - r'with counters on them' - ] - text_mask = tag_utils.create_text_mask(df, text_patterns) + # Create text pattern mask using compiled patterns + text_mask = ( + df['text'].str.contains(rgx.PLUS_ONE_COUNTER.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.IF_HAD_COUNTERS.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.ONE_OR_MORE_COUNTERS.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.ONE_OR_MORE_PLUS_ONE_COUNTERS.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.PROLIFERATE.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.UNDYING.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.WITH_COUNTERS_ON_THEM.pattern, case=False, na=False, regex=True) + ) # Create creature type mask type_mask = df['creatureTypes'].apply(lambda x: 'Hydra' in x if isinstance(x, list) else False) - - # Combine masks final_mask = text_mask | type_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': final_mask, 'tags': ['+1/+1 Counters', 'Counters Matter', 'Voltron'] }, - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with +1/+1 counter effects in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, final_mask, ['+1/+1 Counters', 'Counters Matter', 'Voltron'], + '+1/+1 counter cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging +1/+1 counter effects: {str(e)}') raise @@ -3704,9 +3158,6 @@ def tag_for_minus_counters(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging -1/-1 counter effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: # Create text pattern mask text_patterns = [ @@ -3721,15 +3172,10 @@ def tag_for_minus_counters(df: pd.DataFrame, color: str) -> None: ] text_mask = tag_utils.create_text_mask(df, text_patterns) - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': text_mask, 'tags': ['-1/-1 Counters', 'Counters Matter'] }, - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {text_mask.sum()} cards with -1/-1 counter effects in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, text_mask, ['-1/-1 Counters', 'Counters Matter'], + '-1/-1 counter cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging -1/-1 counter effects: {str(e)}') raise @@ -3741,31 +3187,17 @@ def tag_for_special_counters(df: pd.DataFrame, color: str) -> None: df: DataFrame containing card data color: Color identifier for logging purposes """ - logger.info(f'Tagging special counter effects in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - # Process each counter type - counter_counts = {} + rules = [] for counter_type in tag_constants.COUNTER_TYPES: - # Create pattern for this counter type pattern = f'{counter_type} counter' mask = tag_utils.create_text_mask(df, pattern) + tags = [f'{counter_type} Counters', 'Counters Matter'] + rules.append({ 'mask': mask, 'tags': tags }) - if mask.any(): - # Apply tags via rules engine - tags = [f'{counter_type} Counters', 'Counters Matter'] - tag_utils.apply_rules(df, [ { 'mask': mask, 'tags': tags } ]) - counter_counts[counter_type] = mask.sum() - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - total_cards = sum(counter_counts.values()) - logger.info(f'Tagged {total_cards} cards with special counter effects in {duration:.2f}s') - for counter_type, count in counter_counts.items(): - if count > 0: - logger.info(f' - {counter_type}: {count} cards') - + tag_utils.tag_with_rules_and_logging( + df, rules, 'Special counter cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging special counter effects: {str(e)}') raise @@ -3835,35 +3267,22 @@ def tag_for_voltron(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting Voltron strategy tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different Voltron aspects commander_mask = create_voltron_commander_mask(df) support_mask = create_voltron_support_mask(df) equipment_mask = create_voltron_equipment_mask(df) aura_mask = create_voltron_aura_mask(df) - - # Combine masks final_mask = commander_mask | support_mask | equipment_mask | aura_mask - - # Apply tags - tag_utils.apply_tag_vectorized(df, final_mask, ['Voltron']) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with Voltron strategy in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Voltron'], + 'Voltron strategy cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_voltron: {str(e)}') @@ -3879,15 +3298,12 @@ def create_lands_matter_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards have lands matter effects """ - # Create mask for named cards name_mask = tag_utils.create_name_mask(df, tag_constants.LANDS_MATTER_SPECIFIC_CARDS) # Create text pattern masks play_mask = tag_utils.create_text_mask(df, tag_constants.LANDS_MATTER_PATTERNS['land_play']) search_mask = tag_utils.create_text_mask(df, tag_constants.LANDS_MATTER_PATTERNS['land_search']) state_mask = tag_utils.create_text_mask(df, tag_constants.LANDS_MATTER_PATTERNS['land_state']) - - # Combine all masks return name_mask | play_mask | search_mask | state_mask def create_domain_mask(df: pd.DataFrame) -> pd.Series: @@ -3940,8 +3356,6 @@ def create_land_types_mask(df: pd.DataFrame) -> pd.Series: """ # Create type-based mask type_mask = tag_utils.create_type_mask(df, tag_constants.LAND_TYPES) - - # Create text pattern masks for each land type text_masks = [] for land_type in tag_constants.LAND_TYPES: patterns = [ @@ -3950,8 +3364,6 @@ def create_land_types_mask(df: pd.DataFrame) -> pd.Series: f'{land_type} you control' ] text_masks.append(tag_utils.create_text_mask(df, patterns)) - - # Combine all masks return type_mask | pd.concat(text_masks, axis=1).any(axis=1) def tag_for_lands_matter(df: pd.DataFrame, color: str) -> None: @@ -3971,45 +3383,24 @@ def tag_for_lands_matter(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting lands matter tagging for {color}_cards.csv') print('\n==========\n') try: - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different land effects lands_mask = create_lands_matter_mask(df) domain_mask = create_domain_mask(df) landfall_mask = create_landfall_mask(df) landwalk_mask = create_landwalk_mask(df) types_mask = create_land_types_mask(df) - - # Apply tags using centralized rules - tag_utils.apply_rules(df, [ - { 'mask': lands_mask, 'tags': ['Lands Matter'] }, - { 'mask': domain_mask, 'tags': ['Domain', 'Lands Matter'] }, - { 'mask': landfall_mask, 'tags': ['Landfall', 'Lands Matter'] }, - { 'mask': landwalk_mask, 'tags': ['Landwalk', 'Lands Matter'] }, - { 'mask': types_mask, 'tags': ['Land Types Matter', 'Lands Matter'] }, - ]) - - # Log counts (only when any matches) - if lands_mask.any(): - logger.info(f'Tagged {lands_mask.sum()} cards with general lands matter effects') - if domain_mask.any(): - logger.info(f'Tagged {domain_mask.sum()} cards with domain effects') - if landfall_mask.any(): - logger.info(f'Tagged {landfall_mask.sum()} cards with landfall effects') - if landwalk_mask.any(): - logger.info(f'Tagged {landwalk_mask.sum()} cards with landwalk abilities') - if types_mask.any(): - logger.info(f'Tagged {types_mask.sum()} cards with specific land type effects') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed lands matter tagging in {duration:.2f}s') + rules = [ + {'mask': lands_mask, 'tags': ['Lands Matter']}, + {'mask': domain_mask, 'tags': ['Domain', 'Lands Matter']}, + {'mask': landfall_mask, 'tags': ['Landfall', 'Lands Matter']}, + {'mask': landwalk_mask, 'tags': ['Landwalk', 'Lands Matter']}, + {'mask': types_mask, 'tags': ['Land Types Matter', 'Lands Matter']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'lands matter effects', color=color, logger=logger) except Exception as e: logger.error(f'Error in tag_for_lands_matter: {str(e)}') @@ -4120,39 +3511,27 @@ def tag_for_spellslinger(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() logger.info(f'Starting Spellslinger tagging for {color}_cards.csv') print('\n==========\n') try: - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different spellslinger patterns text_mask = create_spellslinger_text_mask(df) keyword_mask = create_spellslinger_keyword_mask(df) type_mask = create_spellslinger_type_mask(df) exclusion_mask = create_spellslinger_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | keyword_mask | type_mask) & ~exclusion_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': final_mask, 'tags': ['Spellslinger', 'Spells Matter'] }, - ]) - logger.info(f'Tagged {final_mask.sum()} general Spellslinger cards') + tag_utils.tag_with_logging( + df, final_mask, ['Spellslinger', 'Spells Matter'], + 'general Spellslinger cards', color=color, logger=logger + ) # Run non-generalized tags tag_for_storm(df, color) tag_for_magecraft(df, color) tag_for_cantrips(df, color) tag_for_spell_copy(df, color) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed Spellslinger tagging in {duration:.2f}s') except Exception as e: logger.error(f'Error in tag_for_spellslinger: {str(e)}') @@ -4195,22 +3574,11 @@ def tag_for_storm(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing """ try: - # Validate required columns - required_cols = {'text', 'themeTags', 'keywords'} - tag_utils.validate_dataframe_columns(df, required_cols) - - # Create storm mask storm_mask = create_storm_mask(df) - - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': storm_mask, 'tags': ['Storm', 'Spellslinger', 'Spells Matter'] }, - ]) - - # Log results - storm_count = storm_mask.sum() - logger.info(f'Tagged {storm_count} cards with Storm effects') - + tag_utils.tag_with_logging( + df, storm_mask, ['Storm', 'Spellslinger', 'Spells Matter'], + 'Storm cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Storm effects: {str(e)}') raise @@ -4267,8 +3635,6 @@ def tag_for_cantrips(df: pd.DataFrame, color: str) -> None: has_draw & low_cost ) - - # Apply tags via rules engine tag_utils.apply_rules(df, [ { 'mask': cantrip_mask, 'tags': tag_constants.TAG_GROUPS['Cantrips'] }, ]) @@ -4304,22 +3670,11 @@ def tag_for_magecraft(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing """ try: - # Validate required columns - required_cols = {'themeTags', 'keywords'} - tag_utils.validate_dataframe_columns(df, required_cols) - - # Create magecraft mask magecraft_mask = create_magecraft_mask(df) - - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': magecraft_mask, 'tags': ['Magecraft', 'Spellslinger', 'Spells Matter'] }, - ]) - - # Log results - magecraft_count = magecraft_mask.sum() - logger.info(f'Tagged {magecraft_count} cards with Magecraft effects') - + tag_utils.tag_with_logging( + df, magecraft_mask, ['Magecraft', 'Spellslinger', 'Spells Matter'], + 'Magecraft cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error tagging Magecraft effects: {str(e)}') raise @@ -4390,18 +3745,11 @@ def tag_for_spell_copy(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing """ try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different spell copy patterns text_mask = create_spell_copy_text_mask(df) keyword_mask = create_spell_copy_keyword_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask - - # Apply tags via rules engine tag_utils.apply_rules(df, [ { 'mask': final_mask, 'tags': ['Spell Copy', 'Spellslinger', 'Spells Matter'] }, ]) @@ -4534,38 +3882,20 @@ def tag_for_ramp(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting ramp tagging for {color}_cards.csv') print('\n==========\n') try: - # Create masks for different ramp categories dork_mask = create_mana_dork_mask(df) rock_mask = create_mana_rock_mask(df) lands_mask = create_extra_lands_mask(df) search_mask = create_land_search_mask(df) - - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': dork_mask, 'tags': ['Mana Dork', 'Ramp'] }, - { 'mask': rock_mask, 'tags': ['Mana Rock', 'Ramp'] }, - { 'mask': lands_mask, 'tags': ['Lands Matter', 'Ramp'] }, - { 'mask': search_mask, 'tags': ['Lands Matter', 'Ramp'] }, - ]) - - # Logging - if dork_mask.any(): - logger.info(f'Tagged {dork_mask.sum()} mana dork cards') - if rock_mask.any(): - logger.info(f'Tagged {rock_mask.sum()} mana rock cards') - if lands_mask.any(): - logger.info(f'Tagged {lands_mask.sum()} extra lands cards') - if search_mask.any(): - logger.info(f'Tagged {search_mask.sum()} land search cards') - - # Log completion - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed ramp tagging in {duration:.2f}s') + rules = [ + {'mask': dork_mask, 'tags': ['Mana Dork', 'Ramp']}, + {'mask': rock_mask, 'tags': ['Mana Rock', 'Ramp']}, + {'mask': lands_mask, 'tags': ['Lands Matter', 'Ramp']}, + {'mask': search_mask, 'tags': ['Lands Matter', 'Ramp']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'ramp effects', color=color, logger=logger) except Exception as e: logger.error(f'Error in tag_for_ramp: {str(e)}') @@ -4733,36 +4063,21 @@ def tag_for_aggro(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting Aggro strategy tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different aggro aspects text_mask = create_aggro_text_mask(df) keyword_mask = create_aggro_keyword_mask(df) theme_mask = create_aggro_theme_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask | theme_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, [ - { 'mask': final_mask, 'tags': ['Aggro', 'Combat Matters'] }, - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with Aggro strategy in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Aggro', 'Combat Matters'], + 'Aggro strategy cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_aggro: {str(e)}') @@ -4861,35 +4176,19 @@ def tag_for_aristocrats(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting aristocrats effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'name', 'type', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different aristocrat patterns text_mask = create_aristocrat_text_mask(df) name_mask = create_aristocrat_name_mask(df) self_sacrifice_mask = create_aristocrat_self_sacrifice_mask(df) keyword_mask = create_aristocrat_keyword_mask(df) exclusion_mask = create_aristocrat_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | name_mask | self_sacrifice_mask | keyword_mask) & ~exclusion_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Aristocrats', 'Sacrifice Matters'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with aristocrats effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Aristocrats', 'Sacrifice Matters'], + 'aristocrats effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_aristocrats: {str(e)}') @@ -4902,40 +4201,20 @@ def tag_for_bending(df: pd.DataFrame, color: str) -> None: Looks for 'airbend', 'waterbend', 'firebend', 'earthbend' in rules text and applies tags accordingly. """ - logger.info(f'Tagging Bending keywords in {color}_cards.csv') - start_time = pd.Timestamp.now() - try: - rules = [] air_mask = tag_utils.create_text_mask(df, 'airbend') - if air_mask.any(): - rules.append({ 'mask': air_mask, 'tags': ['Airbending', 'Exile Matters', 'Leave the Battlefield'] }) - water_mask = tag_utils.create_text_mask(df, 'waterbend') - if water_mask.any(): - rules.append({ 'mask': water_mask, 'tags': ['Waterbending', 'Cost Reduction', 'Big Mana'] }) - fire_mask = tag_utils.create_text_mask(df, 'firebend') - if fire_mask.any(): - rules.append({ 'mask': fire_mask, 'tags': ['Aggro', 'Combat Matters', 'Firebending', 'Mana Dork', 'Ramp', 'X Spells'] }) - earth_mask = tag_utils.create_text_mask(df, 'earthbend') - if earth_mask.any(): - rules.append({ 'mask': earth_mask, 'tags': ['Earthbending', 'Lands Matter', 'Landfall'] }) - bending_mask = air_mask | water_mask | fire_mask | earth_mask - if bending_mask.any(): - rules.append({ 'mask': bending_mask, 'tags': ['Bending'] }) - - if rules: - tag_utils.apply_rules(df, rules) - total = sum(int(r['mask'].sum()) for r in rules) - logger.info('Tagged %d cards with Bending keywords', total) - else: - logger.info('No Bending keywords found') - - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info('Completed Bending tagging in %.2fs', duration) + rules = [ + {'mask': air_mask, 'tags': ['Airbending', 'Exile Matters', 'Leave the Battlefield']}, + {'mask': water_mask, 'tags': ['Waterbending', 'Cost Reduction', 'Big Mana']}, + {'mask': fire_mask, 'tags': ['Aggro', 'Combat Matters', 'Firebending', 'Mana Dork', 'Ramp', 'X Spells']}, + {'mask': earth_mask, 'tags': ['Earthbending', 'Lands Matter', 'Landfall']}, + {'mask': bending_mask, 'tags': ['Bending']}, + ] + tag_utils.tag_with_rules_and_logging(df, rules, 'bending effects', color=color, logger=logger) except Exception as e: logger.error(f'Error tagging Bending keywords: {str(e)}') @@ -4977,41 +4256,23 @@ def tag_for_big_mana(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting big mana tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'manaValue', 'manaCost', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different big mana patterns text_mask = tag_utils.create_text_mask(df, tag_constants.BIG_MANA_TEXT_PATTERNS) keyword_mask = tag_utils.create_keyword_mask(df, tag_constants.BIG_MANA_KEYWORDS) cost_mask = create_big_mana_cost_mask(df) specific_mask = tag_utils.create_name_mask(df, tag_constants.BIG_MANA_SPECIFIC_CARDS) tag_mask = tag_utils.create_tag_mask(df, 'Cost Reduction') - - # Combine all masks final_mask = text_mask | keyword_mask | cost_mask | specific_mask | tag_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Big Mana'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with big mana effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Big Mana'], + 'big mana effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_big_mana: {str(e)}') @@ -5094,49 +4355,34 @@ def tag_for_blink(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting blink/flicker effect tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different blink patterns etb_mask = create_etb_mask(df) ltb_mask = create_ltb_mask(df) blink_mask = create_blink_text_mask(df) # Create name-based masks name_patterns = df.apply( - lambda row: f'when {row["name"]} enters|whenever {row["name"]} enters|when {row["name"]} leaves|whenever {row["name"]} leaves', + lambda row: re.compile( + f'when {row["name"]} enters|whenever {row["name"]} enters|when {row["name"]} leaves|whenever {row["name"]} leaves', + re.IGNORECASE + ), axis=1 ) name_mask = df.apply( - lambda row: bool(re.search(name_patterns[row.name], row['text'], re.IGNORECASE)) if pd.notna(row['text']) else False, + lambda row: bool(name_patterns[row.name].search(row['text'])) if pd.notna(row['text']) else False, axis=1 ) - - # Combine all masks final_mask = etb_mask | ltb_mask | blink_mask | name_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Blink', 'Enter the Battlefield', 'Leave the Battlefield'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with blink/flicker effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Blink', 'Enter the Battlefield', 'Leave the Battlefield'], + 'blink/flicker effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_blink: {str(e)}') @@ -5169,13 +4415,12 @@ def create_burn_damage_mask(df: pd.DataFrame) -> pd.Series: ] trigger_mask = tag_utils.create_text_mask(df, trigger_patterns) - # Create pinger patterns (avoid grouped regex to prevent pandas warning) - pinger_patterns = [ - r'deals\s+1\s+damage', - r'exactly\s+1\s+damage', - r'loses\s+1\s+life', - ] - pinger_mask = tag_utils.create_text_mask(df, pinger_patterns) + # Create pinger patterns using compiled patterns + pinger_mask = ( + df['text'].str.contains(rgx.DEALS_ONE_DAMAGE.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.EXACTLY_ONE_DAMAGE.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.LOSES_ONE_LIFE.pattern, case=False, na=False, regex=True) + ) return damage_mask | trigger_mask | pinger_mask @@ -5245,40 +4490,25 @@ def tag_for_burn(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting burn effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different burn patterns damage_mask = create_burn_damage_mask(df) life_mask = create_burn_life_loss_mask(df) keyword_mask = create_burn_keyword_mask(df) exclusion_mask = create_burn_exclusion_mask(df) - - # Combine masks burn_mask = (damage_mask | life_mask | keyword_mask) & ~exclusion_mask - pinger_mask = tag_utils.create_text_mask( - df, - [ - r'deals\s+1\s+damage', - r'exactly\s+1\s+damage', - r'loses\s+1\s+life', - ] + + # Pinger mask using compiled patterns (eliminates duplication) + pinger_mask = ( + df['text'].str.contains(rgx.DEALS_ONE_DAMAGE.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.EXACTLY_ONE_DAMAGE.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.LOSES_ONE_LIFE.pattern, case=False, na=False, regex=True) ) - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ + tag_utils.tag_with_rules_and_logging(df, [ {'mask': burn_mask, 'tags': ['Burn']}, {'mask': pinger_mask & ~exclusion_mask, 'tags': ['Pingers']}, - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {burn_mask.sum()} cards with burn effects in {duration:.2f}s') + ], 'burn effects', color=color, logger=logger) except Exception as e: logger.error(f'Error in tag_for_burn: {str(e)}') @@ -5345,33 +4575,17 @@ def tag_for_clones(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting clone effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different clone patterns text_mask = create_clone_text_mask(df) keyword_mask = create_clone_keyword_mask(df) exclusion_mask = create_clone_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | keyword_mask) & ~exclusion_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Clones'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with clone effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Clones'], + 'clone effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_clones: {str(e)}') @@ -5450,33 +4664,17 @@ def tag_for_control(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting control effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different control patterns text_mask = create_control_text_mask(df) keyword_mask = create_control_keyword_mask(df) specific_mask = create_control_specific_cards_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask | specific_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Control'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with control effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Control'], + 'control effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_control: {str(e)}') @@ -5498,30 +4696,13 @@ def tag_for_energy(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting energy counter tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create mask for energy text: literal {E} and common phrases - energy_patterns = [r'\{e\}', 'energy counter', 'energy counters'] - energy_mask = tag_utils.create_text_mask(df, energy_patterns) - - # Apply tags via rules engine (also mark as a Resource Engine per request) - tag_utils.apply_rules(df, rules=[ - { - 'mask': energy_mask, - 'tags': ['Energy', 'Resource Engine'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {energy_mask.sum()} cards with energy effects in {duration:.2f}s') - + energy_mask = tag_utils.create_text_mask(df, [r'\{e\}', 'energy counter', 'energy counters']) + tag_utils.tag_with_logging( + df, energy_mask, ['Energy', 'Resource Engine'], 'energy cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_energy: {str(e)}') raise @@ -5536,12 +4717,12 @@ def create_infect_text_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards have infect text patterns """ - text_patterns = [ - 'one or more counter', - 'poison counter', - r'toxic\s*\d+', - ] - return tag_utils.create_text_mask(df, text_patterns) + # Use compiled patterns for regex, plain strings for simple searches + return ( + df['text'].str.contains('one or more counter', case=False, na=False) | + df['text'].str.contains('poison counter', case=False, na=False) | + df['text'].str.contains(rgx.TOXIC.pattern, case=False, na=False, regex=True) + ) def create_infect_keyword_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards with infect-related keywords. @@ -5587,34 +4768,15 @@ def tag_for_infect(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting infect effect tagging for {color}_cards.csv') - try: - # Validate required columns - required_cols = {'text', 'themeTags', 'keywords'} - tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different infect patterns text_mask = create_infect_text_mask(df) keyword_mask = create_infect_keyword_mask(df) exclusion_mask = create_infect_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | keyword_mask) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Infect'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with infect effects in {duration:.2f}s') - + tag_utils.tag_with_logging( + df, final_mask, ['Infect'], 'infect cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_infect: {str(e)}') raise @@ -5680,32 +4842,18 @@ def tag_for_legends_matter(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting legendary/historic tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'type'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different legendary patterns text_mask = create_legends_text_mask(df) type_mask = create_legends_type_mask(df) - - # Combine masks final_mask = text_mask | type_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Historics Matter', 'Legends Matter'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with legendary/historic effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Historics Matter', 'Legends Matter'], + 'legendary/historic effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_legends_matter: {str(e)}') @@ -5721,10 +4869,7 @@ def create_little_guys_power_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards have power 2 or less """ - # Create mask for valid power values valid_power = pd.to_numeric(df['power'], errors='coerce') - - # Create mask for power <= 2 return (valid_power <= 2) & pd.notna(valid_power) def tag_for_little_guys(df: pd.DataFrame, color: str) -> None: @@ -5746,38 +4891,20 @@ def tag_for_little_guys(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting low-power creature tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'power', 'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different patterns power_mask = create_little_guys_power_mask(df) text_mask = tag_utils.create_text_mask(df, 'power 2 or less') - - # Combine masks final_mask = power_mask | text_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Little Fellas'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with Little Fellas in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Little Fellas'], + 'low-power creatures', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_little_guys: {str(e)}') @@ -5847,32 +4974,16 @@ def tag_for_mill(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting mill effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different mill patterns text_mask = create_mill_text_mask(df) keyword_mask = create_mill_keyword_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Mill'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with mill effects in {duration:.2f}s') + tag_utils.tag_with_logging( + df, final_mask, ['Mill'], + 'mill effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_mill: {str(e)}') @@ -5898,41 +5009,21 @@ def tag_for_monarch(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting monarch mechanic tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - # Create text pattern mask using centralized phrase group - text_mask = tag_utils.create_text_mask(df, tag_constants.PHRASE_GROUPS['monarch']) - - # Create keyword mask - keyword_mask = tag_utils.create_keyword_mask(df, 'Monarch') - - # Combine masks - final_mask = text_mask | keyword_mask - - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Monarch'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with monarch effects in {duration:.2f}s') - + # Combine text and keyword masks + final_mask = tag_utils.build_combined_mask( + df, text_patterns=tag_constants.PHRASE_GROUPS['monarch'], keyword_patterns='Monarch' + ) + tag_utils.tag_with_logging( + df, final_mask, ['Monarch'], 'monarch cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_monarch: {str(e)}') raise @@ -5953,35 +5044,21 @@ def tag_for_multiple_copies(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting multiple copies tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'name', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create mask for multiple copy cards multiple_copies_mask = tag_utils.create_name_mask(df, MULTIPLE_COPY_CARDS) - - # Apply tags via rules engine if multiple_copies_mask.any(): matching_cards = df[multiple_copies_mask]['name'].unique() rules = [{'mask': multiple_copies_mask, 'tags': ['Multiple Copies']}] # Add per-card rules for individual name tags rules.extend({'mask': (df['name'] == card_name), 'tags': [card_name]} for card_name in matching_cards) tag_utils.apply_rules(df, rules=rules) - logger.info(f'Tagged {multiple_copies_mask.sum()} cards with multiple copies effects') - - # Log completion - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Completed multiple copies tagging in {duration:.2f}s') + logger.info(f'Tagged {multiple_copies_mask.sum()} cards with multiple copies effects for {color}') except Exception as e: logger.error(f'Error in tag_for_multiple_copies: {str(e)}') @@ -6049,39 +5126,23 @@ def tag_for_planeswalkers(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting planeswalker tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different planeswalker patterns text_mask = create_planeswalker_text_mask(df) type_mask = create_planeswalker_type_mask(df) keyword_mask = create_planeswalker_keyword_mask(df) - - # Combine masks final_mask = text_mask | type_mask | keyword_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Planeswalkers', 'Superfriends'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with planeswalker effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Planeswalkers', 'Superfriends'], + 'planeswalker effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_planeswalkers: {str(e)}') @@ -6156,33 +5217,19 @@ def tag_for_reanimate(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting reanimator effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords', 'creatureTypes'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different reanimator patterns text_mask = create_reanimator_text_mask(df) keyword_mask = create_reanimator_keyword_mask(df) type_mask = create_reanimator_type_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask | type_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Reanimate'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with reanimator effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Reanimate'], + 'reanimator effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_reanimate: {str(e)}') @@ -6250,46 +5297,26 @@ def tag_for_stax(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting stax effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different stax patterns text_mask = create_stax_text_mask(df) name_mask = create_stax_name_mask(df) tag_mask = create_stax_tag_mask(df) exclusion_mask = create_stax_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | tag_mask | name_mask) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Stax'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with stax effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Stax'], + 'stax effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_stax: {str(e)}') raise ## Pillowfort -def create_pillowfort_text_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_text_mask(df, tag_constants.PILLOWFORT_TEXT_PATTERNS) - -def create_pillowfort_name_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_name_mask(df, tag_constants.PILLOWFORT_SPECIFIC_CARDS) - def tag_for_pillowfort(df: pd.DataFrame, color: str) -> None: """Tag classic deterrent / taxation defensive permanents as Pillowfort. @@ -6300,23 +5327,18 @@ def tag_for_pillowfort(df: pd.DataFrame, color: str) -> None: try: required_cols = {'text','themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - text_mask = create_pillowfort_text_mask(df) - name_mask = create_pillowfort_name_mask(df) - final_mask = text_mask | name_mask - if final_mask.any(): - tag_utils.apply_rules(df, rules=[{'mask': final_mask, 'tags': ['Pillowfort']}]) - logger.info(f'Tagged {final_mask.sum()} cards with Pillowfort') + final_mask = tag_utils.build_combined_mask( + df, text_patterns=tag_constants.PILLOWFORT_TEXT_PATTERNS, + name_list=tag_constants.PILLOWFORT_SPECIFIC_CARDS + ) + tag_utils.tag_with_logging( + df, final_mask, ['Pillowfort'], 'Pillowfort cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_pillowfort: {e}') raise ## Politics -def create_politics_text_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_text_mask(df, tag_constants.POLITICS_TEXT_PATTERNS) - -def create_politics_name_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_name_mask(df, tag_constants.POLITICS_SPECIFIC_CARDS) - def tag_for_politics(df: pd.DataFrame, color: str) -> None: """Tag cards that promote table negotiation, shared resources, votes, or gifting. @@ -6326,12 +5348,13 @@ def tag_for_politics(df: pd.DataFrame, color: str) -> None: try: required_cols = {'text','themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - text_mask = create_politics_text_mask(df) - name_mask = create_politics_name_mask(df) - final_mask = text_mask | name_mask - if final_mask.any(): - tag_utils.apply_rules(df, rules=[{'mask': final_mask, 'tags': ['Politics']}]) - logger.info(f'Tagged {final_mask.sum()} cards with Politics') + final_mask = tag_utils.build_combined_mask( + df, text_patterns=tag_constants.POLITICS_TEXT_PATTERNS, + name_list=tag_constants.POLITICS_SPECIFIC_CARDS + ) + tag_utils.tag_with_logging( + df, final_mask, ['Politics'], 'Politics cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_politics: {e}') raise @@ -6340,41 +5363,35 @@ def tag_for_politics(df: pd.DataFrame, color: str) -> None: ## (Control archetype functions removed to avoid duplication; existing tag_for_control covers it) ## Midrange Archetype -def create_midrange_text_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_text_mask(df, tag_constants.MIDRANGE_TEXT_PATTERNS) - -def create_midrange_name_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_name_mask(df, tag_constants.MIDRANGE_SPECIFIC_CARDS) - def tag_for_midrange_archetype(df: pd.DataFrame, color: str) -> None: """Tag resilient, incremental value permanents for Midrange identity.""" try: required_cols = {'text','themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - mask = create_midrange_text_mask(df) | create_midrange_name_mask(df) - if mask.any(): - tag_utils.apply_rules(df, rules=[{'mask': mask, 'tags': ['Midrange']}]) - logger.info(f'Tagged {mask.sum()} cards with Midrange archetype') + mask = tag_utils.build_combined_mask( + df, text_patterns=tag_constants.MIDRANGE_TEXT_PATTERNS, + name_list=tag_constants.MIDRANGE_SPECIFIC_CARDS + ) + tag_utils.tag_with_logging( + df, mask, ['Midrange'], 'Midrange archetype cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_midrange_archetype: {e}') raise ## Toolbox Archetype -def create_toolbox_text_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_text_mask(df, tag_constants.TOOLBOX_TEXT_PATTERNS) - -def create_toolbox_name_mask(df: pd.DataFrame) -> pd.Series: - return tag_utils.create_name_mask(df, tag_constants.TOOLBOX_SPECIFIC_CARDS) - def tag_for_toolbox_archetype(df: pd.DataFrame, color: str) -> None: """Tag tutor / search engine pieces that enable a toolbox plan.""" try: required_cols = {'text','themeTags'} tag_utils.validate_dataframe_columns(df, required_cols) - mask = create_toolbox_text_mask(df) | create_toolbox_name_mask(df) - if mask.any(): - tag_utils.apply_rules(df, rules=[{'mask': mask, 'tags': ['Toolbox']}]) - logger.info(f'Tagged {mask.sum()} cards with Toolbox archetype') + mask = tag_utils.build_combined_mask( + df, text_patterns=tag_constants.TOOLBOX_TEXT_PATTERNS, + name_list=tag_constants.TOOLBOX_SPECIFIC_CARDS + ) + tag_utils.tag_with_logging( + df, mask, ['Toolbox'], 'Toolbox archetype cards', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_toolbox_archetype: {e}') raise @@ -6418,32 +5435,18 @@ def tag_for_theft(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting theft effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different theft patterns text_mask = create_theft_text_mask(df) name_mask = create_theft_name_mask(df) - - # Combine masks final_mask = text_mask | name_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Theft'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with theft effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Theft'], + 'theft effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_theft: {str(e)}') @@ -6531,33 +5534,19 @@ def tag_for_toughness(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting toughness tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords', 'power', 'toughness'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different toughness patterns text_mask = create_toughness_text_mask(df) keyword_mask = create_toughness_keyword_mask(df) power_toughness_mask = create_power_toughness_mask(df) - - # Combine masks final_mask = text_mask | keyword_mask | power_toughness_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Toughness Matters'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with toughness effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Toughness Matters'], + 'toughness effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_toughness: {str(e)}') @@ -6624,34 +5613,20 @@ def tag_for_topdeck(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting topdeck effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different topdeck patterns text_mask = create_topdeck_text_mask(df) keyword_mask = create_topdeck_keyword_mask(df) specific_mask = create_topdeck_specific_mask(df) exclusion_mask = create_topdeck_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | keyword_mask | specific_mask) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Topdeck'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with topdeck effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Topdeck'], + 'topdeck effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_topdeck: {str(e)}') @@ -6667,15 +5642,14 @@ def create_x_spells_text_mask(df: pd.DataFrame) -> pd.Series: Returns: Boolean Series indicating which cards have X spell text patterns """ - # Consolidate numeric patterns with regex - text_patterns = [ - r'cost \{[xX\d]\} less', - r"don\'t lose (?:this|unspent|unused)", - r'unused mana would empty', - r'with \{[xX]\} in (?:its|their)', - r'you cast cost \{\d\} less' - ] - return tag_utils.create_text_mask(df, text_patterns) + # Use compiled patterns for regex, plain strings for simple searches + return ( + df['text'].str.contains(rgx.COST_LESS.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(r"don\'t lose (?:this|unspent|unused)", case=False, na=False, regex=True) | + df['text'].str.contains('unused mana would empty', case=False, na=False) | + df['text'].str.contains(rgx.WITH_X_IN_COST.pattern, case=False, na=False, regex=True) | + df['text'].str.contains(rgx.SPELLS_YOU_CAST_COST.pattern, case=False, na=False, regex=True) + ) def create_x_spells_mana_mask(df: pd.DataFrame) -> pd.Series: """Create a boolean mask for cards with X in their mana cost. @@ -6704,32 +5678,18 @@ def tag_for_x_spells(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting X spells tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'manaCost'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different X spell patterns text_mask = create_x_spells_text_mask(df) mana_mask = create_x_spells_mana_mask(df) - - # Combine masks final_mask = text_mask | mana_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['X Spells'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with X spell effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['X Spells'], + 'X spell effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_x_spells: {str(e)}') @@ -6763,13 +5723,10 @@ def tag_for_interaction(df: pd.DataFrame, color: str) -> None: print('\n==========\n') try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'name', 'type', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -6803,8 +5760,6 @@ def tag_for_interaction(df: pd.DataFrame, color: str) -> None: tag_for_removal(df, color) logger.info(f'Completed removal tagging in {(pd.Timestamp.now() - sub_start).total_seconds():.2f}s') print('\n==========\n') - - # Log completion and performance metrics duration = pd.Timestamp.now() - start_time logger.info(f'Completed all interaction tagging in {duration.total_seconds():.2f}s') @@ -6862,33 +5817,19 @@ def tag_for_counterspells(df: pd.DataFrame, color: str) -> None: Raises: ValueError: If required DataFrame columns are missing """ - start_time = pd.Timestamp.now() - logger.info(f'Starting counterspell effect tagging for {color}_cards.csv') - try: - # Validate required columns required_cols = {'text', 'themeTags', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different counterspell patterns text_mask = create_counterspell_text_mask(df) specific_mask = create_counterspell_specific_mask(df) exclusion_mask = create_counterspell_exclusion_mask(df) - - # Combine masks final_mask = (text_mask | specific_mask) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Counterspells', 'Interaction', 'Spellslinger', 'Spells Matter'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with counterspell effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Counterspells', 'Interaction', 'Spellslinger', 'Spells Matter'], + 'counterspell effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_counterspells: {str(e)}') @@ -6916,21 +5857,13 @@ def tag_for_board_wipes(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting board wipe effect tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'name'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different board wipe types destroy_mask = tag_utils.create_mass_effect_mask(df, 'mass_destruction') exile_mask = tag_utils.create_mass_effect_mask(df, 'mass_exile') bounce_mask = tag_utils.create_mass_effect_mask(df, 'mass_bounce') @@ -6942,24 +5875,16 @@ def tag_for_board_wipes(df: pd.DataFrame, color: str) -> None: # Create specific cards mask specific_mask = tag_utils.create_name_mask(df, tag_constants.BOARD_WIPE_SPECIFIC_CARDS) - - # Combine all masks final_mask = ( destroy_mask | exile_mask | bounce_mask | sacrifice_mask | damage_mask | specific_mask ) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Board Wipes', 'Interaction'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with board wipe effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Board Wipes', 'Interaction'], + 'board wipe effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_board_wipes: {str(e)}') @@ -7060,41 +5985,25 @@ def tag_for_combat_tricks(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting combat trick tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'type', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different combat trick patterns text_mask = create_combat_tricks_text_mask(df) type_mask = create_combat_tricks_type_mask(df) flash_mask = create_combat_tricks_flash_mask(df) exclusion_mask = create_combat_tricks_exclusion_mask(df) - - # Combine masks final_mask = ((text_mask & (type_mask | flash_mask)) | (flash_mask & tag_utils.create_type_mask(df, 'Enchantment'))) & ~exclusion_mask - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Combat Tricks', 'Interaction'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with combat trick effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Combat Tricks', 'Interaction'], + 'combat trick effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_combat_tricks: {str(e)}') @@ -7168,6 +6077,166 @@ def create_protection_exclusion_mask(df: pd.DataFrame) -> pd.Series: ] return tag_utils.create_name_mask(df, excluded_cards) +def _identify_protection_granting_cards(df: pd.DataFrame) -> pd.Series: + """Identify cards that grant protection to other permanents. + + Args: + df: DataFrame containing card data + + Returns: + Boolean Series indicating which cards grant protection + """ + from code.tagging.protection_grant_detection import is_granting_protection + + grant_mask = df.apply( + lambda row: is_granting_protection( + str(row.get('text', '')), + str(row.get('keywords', '')) + ), + axis=1 + ) + return grant_mask + + +def _apply_kindred_protection_tags(df: pd.DataFrame, grant_mask: pd.Series) -> int: + """Apply creature-type-specific protection tags. + + Args: + df: DataFrame containing card data + grant_mask: Boolean Series indicating which cards grant protection + + Returns: + Number of cards tagged with kindred protection + """ + from code.tagging.protection_grant_detection import get_kindred_protection_tags + + kindred_count = 0 + for idx, row in df[grant_mask].iterrows(): + text = str(row.get('text', '')) + kindred_tags = get_kindred_protection_tags(text) + + if kindred_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + updated_tags = list(set(current_tags) | set(kindred_tags)) + df.at[idx, 'themeTags'] = updated_tags + kindred_count += 1 + + return kindred_count + + +def _apply_protection_scope_tags(df: pd.DataFrame) -> int: + """Apply scope metadata tags (Self, Your Permanents, Blanket, Opponent). + + Applies to ALL cards with protection effects, not just those that grant protection. + + Args: + df: DataFrame containing card data + + Returns: + Number of cards tagged with scope metadata + """ + from code.tagging.protection_scope_detection import get_protection_scope_tags, has_any_protection + + scope_count = 0 + for idx, row in df.iterrows(): + text = str(row.get('text', '')) + name = str(row.get('name', '')) + keywords = str(row.get('keywords', '')) + + # Check if card has ANY protection effects + if not has_any_protection(text) and not any(k in keywords.lower() for k in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection', 'phasing']): + continue + + scope_tags = get_protection_scope_tags(text, name, keywords) + + if scope_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + updated_tags = list(set(current_tags) | set(scope_tags)) + df.at[idx, 'themeTags'] = updated_tags + scope_count += 1 + + return scope_count + + +def _get_all_protection_mask(df: pd.DataFrame) -> pd.Series: + """Build mask for ALL cards with protection keywords (granting or inherent). + + Args: + df: DataFrame containing card data + + Returns: + Boolean Series indicating which cards have protection keywords + """ + text_series = tag_utils._ensure_norm_series(df, 'text', '__text_s') + keywords_series = tag_utils._ensure_norm_series(df, 'keywords', '__keywords_s') + + all_protection_mask = ( + text_series.str.contains('hexproof|shroud|indestructible|ward|protection from|protection|phasing', case=False, regex=True, na=False) | + keywords_series.str.contains('hexproof|shroud|indestructible|ward|protection|phasing', case=False, regex=True, na=False) + ) + return all_protection_mask + + +def _apply_specific_protection_ability_tags(df: pd.DataFrame, all_protection_mask: pd.Series) -> int: + """Apply specific protection ability tags (Hexproof, Indestructible, etc.). + + Args: + df: DataFrame containing card data + all_protection_mask: Boolean Series indicating cards with protection + + Returns: + Number of cards tagged with specific abilities + """ + ability_tag_count = 0 + for idx, row in df[all_protection_mask].iterrows(): + text = str(row.get('text', '')) + keywords = str(row.get('keywords', '')) + + ability_tags = set() + text_lower = text.lower() + keywords_lower = keywords.lower() + + # Check for each protection ability + if 'hexproof' in text_lower or 'hexproof' in keywords_lower: + ability_tags.add('Hexproof') + if 'indestructible' in text_lower or 'indestructible' in keywords_lower: + ability_tags.add('Indestructible') + if 'shroud' in text_lower or 'shroud' in keywords_lower: + ability_tags.add('Shroud') + if 'ward' in text_lower or 'ward' in keywords_lower: + ability_tags.add('Ward') + + # Distinguish types of protection + if 'protection from' in text_lower or 'protection from' in keywords_lower: + # Check for color protection + if any(color in text_lower or color in keywords_lower for color in ['white', 'blue', 'black', 'red', 'green', 'multicolored', 'monocolored', 'colorless', 'each color', 'all colors', 'the chosen color', 'a color']): + ability_tags.add('Protection from Color') + # Check for creature type protection + elif 'protection from creatures' in text_lower or 'protection from creatures' in keywords_lower: + ability_tags.add('Protection from Creatures') + elif any(ctype.lower() in text_lower for ctype in ['Dragons', 'Zombies', 'Vampires', 'Demons', 'Humans', 'Elves', 'Goblins', 'Werewolves']): + ability_tags.add('Protection from Creature Type') + else: + ability_tags.add('Protection from Quality') + + if ability_tags: + current_tags = row.get('themeTags', []) + if not isinstance(current_tags, list): + current_tags = [] + + updated_tags = list(set(current_tags) | ability_tags) + df.at[idx, 'themeTags'] = updated_tags + ability_tag_count += 1 + + return ability_tag_count + + def tag_for_protection(df: pd.DataFrame, color: str) -> None: """Tag cards that provide or have protection effects using vectorized operations. @@ -7181,9 +6250,6 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: With TAG_PROTECTION_GRANTS=1, only tags cards that grant protection to other permanents, filtering out cards with inherent protection. - The function uses helper functions to identify different types of protection - and applies tags consistently using vectorized operations. - Args: df: DataFrame containing card data color: Color identifier for logging purposes @@ -7192,17 +6258,11 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting protection effect tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) @@ -7211,69 +6271,16 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: if use_grant_detection: # M2: Use grant detection to filter out inherent-only protection - from code.tagging.protection_grant_detection import is_granting_protection, get_kindred_protection_tags - - # Create a grant detection mask - grant_mask = df.apply( - lambda row: is_granting_protection( - str(row.get('text', '')), - str(row.get('keywords', '')) - ), - axis=1 - ) - - final_mask = grant_mask + final_mask = _identify_protection_granting_cards(df) logger.info('Using M2 grant detection (TAG_PROTECTION_GRANTS=1)') # Apply kindred metadata tags for creature-type-specific grants - # Note: These are added to themeTags first, then _apply_metadata_partition() - # will classify them as metadata and move them to metadataTags column - kindred_count = 0 - for idx, row in df[final_mask].iterrows(): - text = str(row.get('text', '')) - kindred_tags = get_kindred_protection_tags(text) - - if kindred_tags: - # Add to themeTags temporarily - partition will move to metadataTags - current_tags = row.get('themeTags', []) - if not isinstance(current_tags, list): - current_tags = [] - - # Add kindred tags (they'll be classified as metadata later) - updated_tags = list(set(current_tags) | set(kindred_tags)) - df.at[idx, 'themeTags'] = updated_tags - kindred_count += 1 - + kindred_count = _apply_kindred_protection_tags(df, final_mask) if kindred_count > 0: logger.info(f'Applied kindred protection tags to {kindred_count} cards (will be moved to metadata by partition)') - # M5: Add protection scope metadata tags (Self, Your Permanents, Blanket, Opponent) - # Apply to ALL cards with protection effects, not just those that passed grant filter - # This ensures inherent protection cards like Aysen Highway get "Self: Protection" tags - from code.tagging.protection_scope_detection import get_protection_scope_tags, has_any_protection - - scope_count = 0 - for idx, row in df.iterrows(): - text = str(row.get('text', '')) - name = str(row.get('name', '')) - keywords = str(row.get('keywords', '')) - - # Check if card has ANY protection effects (text or keywords) - if not has_any_protection(text) and not any(k in keywords.lower() for k in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection', 'phasing']): - continue - - scope_tags = get_protection_scope_tags(text, name) - - if scope_tags: - current_tags = row.get('themeTags', []) - if not isinstance(current_tags, list): - current_tags = [] - - # Add scope tags to themeTags (partition will move to metadataTags) - updated_tags = list(set(current_tags) | set(scope_tags)) - df.at[idx, 'themeTags'] = updated_tags - scope_count += 1 - + # M5: Add protection scope metadata tags + scope_count = _apply_protection_scope_tags(df) if scope_count > 0: logger.info(f'Applied protection scope tags to {scope_count} cards (will be moved to metadata by partition)') else: @@ -7283,54 +6290,26 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None: exclusion_mask = create_protection_exclusion_mask(df) final_mask = (text_mask | keyword_mask) & ~exclusion_mask - # Apply generic protection tags first + # Build comprehensive mask for ALL cards with protection keywords + all_protection_mask = _get_all_protection_mask(df) + + # Apply generic 'Protective Effects' tag to ALL cards with protection tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Protection', 'Interaction'] - } + {'mask': all_protection_mask, 'tags': ['Protective Effects']} ]) - # Apply specific protection ability tags (Hexproof, Indestructible, etc.) - # These are theme tags indicating which specific protections the card provides - ability_tag_count = 0 - for idx, row in df[final_mask].iterrows(): - text = str(row.get('text', '')) - keywords = str(row.get('keywords', '')) - - # Detect which specific abilities are present - ability_tags = set() - text_lower = text.lower() - keywords_lower = keywords.lower() - - # Check for each protection ability - if 'hexproof' in text_lower or 'hexproof' in keywords_lower: - ability_tags.add('Hexproof') - if 'indestructible' in text_lower or 'indestructible' in keywords_lower: - ability_tags.add('Indestructible') - if 'shroud' in text_lower or 'shroud' in keywords_lower: - ability_tags.add('Shroud') - if 'ward' in text_lower or 'ward' in keywords_lower: - ability_tags.add('Ward') - if 'protection from' in text_lower or 'protection from' in keywords_lower: - ability_tags.add('Protection from Color') - - if ability_tags: - current_tags = row.get('themeTags', []) - if not isinstance(current_tags, list): - current_tags = [] - - # Add ability tags to themeTags - updated_tags = list(set(current_tags) | ability_tags) - df.at[idx, 'themeTags'] = updated_tags - ability_tag_count += 1 + # Apply 'Interaction' tag ONLY to cards that GRANT protection + tag_utils.apply_rules(df, rules=[ + {'mask': final_mask, 'tags': ['Interaction']} + ]) + # Apply specific protection ability tags + ability_tag_count = _apply_specific_protection_ability_tags(df, all_protection_mask) if ability_tag_count > 0: logger.info(f'Applied specific protection ability tags to {ability_tag_count} cards') # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with protection effects in {duration:.2f}s') + logger.info(f'Tagged {final_mask.sum()} cards with protection effects for {color}') except Exception as e: logger.error(f'Error in tag_for_protection: {str(e)}') @@ -7357,21 +6336,13 @@ def tag_for_phasing(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting phasing effect tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create mask for cards with phasing from code.tagging.phasing_scope_detection import has_phasing, get_phasing_scope_tags, is_removal_phasing phasing_mask = df.apply( @@ -7424,8 +6395,7 @@ def tag_for_phasing(df: pd.DataFrame, color: str) -> None: logger.info(f'Applied Removal tag to {removal_count} cards with opponent-targeting phasing') # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {phasing_mask.sum()} cards with phasing effects in {duration:.2f}s') + logger.info(f'Tagged {phasing_mask.sum()} cards with phasing effects for {color}') except Exception as e: logger.error(f'Error in tag_for_phasing: {str(e)}') @@ -7475,38 +6445,24 @@ def tag_for_removal(df: pd.DataFrame, color: str) -> None: ValueError: If required DataFrame columns are missing TypeError: If inputs are not of correct type """ - start_time = pd.Timestamp.now() - logger.info(f'Starting removal effect tagging for {color}_cards.csv') - try: - # Validate inputs if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(color, str): raise TypeError("color must be a string") - - # Validate required columns required_cols = {'text', 'themeTags', 'keywords'} tag_utils.validate_dataframe_columns(df, required_cols) - - # Create masks for different removal patterns text_mask = create_removal_text_mask(df) exclude_mask = create_removal_exclusion_mask(df) # Combine masks (and exclude self-targeting effects like 'target permanent you control') final_mask = text_mask & (~exclude_mask) - # Apply tags via rules engine - tag_utils.apply_rules(df, rules=[ - { - 'mask': final_mask, - 'tags': ['Removal', 'Interaction'] - } - ]) - - # Log results - duration = (pd.Timestamp.now() - start_time).total_seconds() - logger.info(f'Tagged {final_mask.sum()} cards with removal effects in {duration:.2f}s') + # Apply tags via utility + tag_utils.tag_with_logging( + df, final_mask, ['Removal', 'Interaction'], + 'removal effects', color=color, logger=logger + ) except Exception as e: logger.error(f'Error in tag_for_removal: {str(e)}') diff --git a/code/web/services/orchestrator.py b/code/web/services/orchestrator.py index 9b9f8b4..2179178 100644 --- a/code/web/services/orchestrator.py +++ b/code/web/services/orchestrator.py @@ -900,7 +900,7 @@ def ideal_labels() -> Dict[str, str]: 'removal': 'Spot Removal', 'wipes': 'Board Wipes', 'card_advantage': 'Card Advantage', - 'protection': 'Protection', + 'protection': 'Protective Effects', } @@ -1911,7 +1911,7 @@ def _make_stages(b: DeckBuilder) -> List[Dict[str, Any]]: ("removal", "Confirm Removal", "add_removal"), ("wipes", "Confirm Board Wipes", "add_board_wipes"), ("card_advantage", "Confirm Card Advantage", "add_card_advantage"), - ("protection", "Confirm Protection", "add_protection"), + ("protection", "Confirm Protective Effects", "add_protection"), ] any_granular = any(callable(getattr(b, rn, None)) for _key, _label, rn in spell_categories) if any_granular: diff --git a/config/themes/theme_list.json b/config/themes/theme_list.json index 3d76807..482c9aa 100644 --- a/config/themes/theme_list.json +++ b/config/themes/theme_list.json @@ -36,7 +36,7 @@ ], "popularity_bucket": "Very Common", "editorial_quality": "draft", - "description": "+1/+1 counters build across the board then get doubled, proliferated, or redistributed for exponential scaling. Synergies like Proliferate and Counters Matter reinforce the plan." + "description": "+1/+1 counters build across the board then get doubled, proliferated, or redistributed for exponential scaling. Synergies like Adapt and Evolve reinforce the plan." }, { "id": "0-1-counters", @@ -319,10 +319,10 @@ "theme": "Affinity", "synergies": [ "Cost Reduction", + "X Spells", "Artifacts Matter", "Big Mana", - "Flying", - "Stax" + "Flying" ], "primary_color": "Blue", "secondary_color": "Red", @@ -346,9 +346,9 @@ "synergy_commanders": [ "Ghalta, Primal Hunger - Synergy (Cost Reduction)", "Goreclaw, Terror of Qal Sisma - Synergy (Cost Reduction)", - "Ragavan, Nimble Pilferer - Synergy (Artifacts Matter)", - "Loran of the Third Path - Synergy (Artifacts Matter)", - "Syr Konrad, the Grim - Synergy (Big Mana)" + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)", + "Danitha Capashen, Paragon - Synergy (X Spells)", + "Ragavan, Nimble Pilferer - Synergy (Artifacts Matter)" ], "popularity_bucket": "Niche", "editorial_quality": "draft", @@ -753,7 +753,7 @@ "Lifegain", "Life Matters", "Flying", - "Lifelink" + "Protection" ], "primary_color": "White", "secondary_color": "Black", @@ -2324,9 +2324,9 @@ "synergies": [ "Flying", "Soldier Kindred", + "Protection", "Morph", - "Little Fellas", - "Landfall" + "Protection from Color" ], "primary_color": "White", "secondary_color": "Blue", @@ -2353,7 +2353,7 @@ "Old Gnawbone - Synergy (Flying)", "Boromir, Warden of the Tower - Synergy (Soldier Kindred)", "Anim Pakal, Thousandth Moon - Synergy (Soldier Kindred)", - "Akroma, Angel of Fury - Synergy (Morph)" + "Yawgmoth, Thran Physician - Synergy (Protection)" ], "popularity_bucket": "Common", "editorial_quality": "draft", @@ -3361,9 +3361,9 @@ "synergies": [ "Spirit Kindred", "Cost Reduction", + "X Spells", "Lands Matter", - "Artifacts Matter", - "Enchantments Matter" + "Artifacts Matter" ], "primary_color": "Green", "secondary_color": "Blue", @@ -3388,7 +3388,7 @@ "Junji, the Midnight Sky - Synergy (Spirit Kindred)", "Ghalta, Primal Hunger - Synergy (Cost Reduction)", "Emry, Lurker of the Loch - Synergy (Cost Reduction)", - "Azusa, Lost but Seeking - Synergy (Lands Matter)" + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -3713,12 +3713,18 @@ { "id": "cloak", "theme": "Cloak", - "synergies": [], + "synergies": [ + "Ward", + "Protective Effects" + ], "primary_color": "Blue", "secondary_color": "White", "example_commanders": [ "Etrata, Deadly Fugitive", - "Vannifar, Evolved Enigma" + "Vannifar, Evolved Enigma", + "Adrix and Nev, Twincasters - Synergy (Ward)", + "Miirym, Sentinel Wyrm - Synergy (Ward)", + "Daxos, Blessed by the Sun - Synergy (Ward)" ], "example_cards": [ "Unexplained Absence", @@ -3730,6 +3736,10 @@ "Cryptic Coat", "Veiled Ascension" ], + "synergy_commanders": [ + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)" + ], "popularity_bucket": "Rare", "editorial_quality": "draft", "description": "Builds around the Cloak theme and its supporting synergies." @@ -4423,7 +4433,7 @@ "Freerunning", "Waterbending", "Undaunted", - "Leech Kindred" + "X Spells" ], "primary_color": "Blue", "secondary_color": "Red", @@ -4676,10 +4686,10 @@ "theme": "Crab Kindred", "synergies": [ "Toughness Matters", + "Protective Effects", "Mill", "Blink", - "Enter the Battlefield", - "Leave the Battlefield" + "Enter the Battlefield" ], "primary_color": "Blue", "secondary_color": "Black", @@ -4701,9 +4711,9 @@ "Hard Evidence" ], "synergy_commanders": [ - "Syr Konrad, the Grim - Synergy (Mill)", - "Emry, Lurker of the Loch - Synergy (Mill)", - "Selvala, Heart of the Wilds - Synergy (Blink)" + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Syr Konrad, the Grim - Synergy (Mill)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -5320,6 +5330,8 @@ "id": "demigod-kindred", "theme": "Demigod Kindred", "synergies": [ + "Ward", + "Protective Effects", "Enchantments Matter" ], "primary_color": "Black", @@ -5342,9 +5354,11 @@ "Tymaret, Chosen from Death" ], "synergy_commanders": [ - "Sram, Senior Edificer - Synergy (Enchantments Matter)", - "Purphoros, God of the Forge - Synergy (Enchantments Matter)", - "Jaheira, Friend of the Forest - Synergy (Enchantments Matter)" + "Adrix and Nev, Twincasters - Synergy (Ward)", + "Miirym, Sentinel Wyrm - Synergy (Ward)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Sram, Senior Edificer - Synergy (Enchantments Matter)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -5741,7 +5755,7 @@ "Elder Kindred", "Fight", "Trample", - "Ward" + "Cycling" ], "primary_color": "Green", "secondary_color": "Red", @@ -5855,11 +5869,11 @@ "id": "disguise", "theme": "Disguise", "synergies": [ + "Ward", "Detective Kindred", + "Protective Effects", "Flying", - "+1/+1 Counters", - "Lifegain", - "Life Matters" + "+1/+1 Counters" ], "primary_color": "Green", "secondary_color": "White", @@ -5867,8 +5881,8 @@ "Bayek of Siwa", "Arno Dorian", "Aveline de Grandpré", - "Kellan, Inquisitive Prodigy // Tail the Suspect - Synergy (Detective Kindred)", - "Nelly Borca, Impulsive Accuser - Synergy (Detective Kindred)" + "Adrix and Nev, Twincasters - Synergy (Ward)", + "Miirym, Sentinel Wyrm - Synergy (Ward)" ], "example_cards": [ "Bayek of Siwa", @@ -5881,10 +5895,10 @@ "Boltbender" ], "synergy_commanders": [ - "Piper Wright, Publick Reporter - Synergy (Detective Kindred)", - "Niv-Mizzet, Parun - Synergy (Flying)", - "Avacyn, Angel of Hope - Synergy (Flying)", - "Rishkar, Peema Renegade - Synergy (+1/+1 Counters)" + "Daxos, Blessed by the Sun - Synergy (Ward)", + "Kellan, Inquisitive Prodigy // Tail the Suspect - Synergy (Detective Kindred)", + "Nelly Borca, Impulsive Accuser - Synergy (Detective Kindred)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -5932,7 +5946,9 @@ "id": "divinity-counters", "theme": "Divinity Counters", "synergies": [ + "Indestructible", "Spirit Kindred", + "Protective Effects", "Counters Matter", "Big Mana" ], @@ -5955,12 +5971,12 @@ "Myojin of Infinite Rage" ], "synergy_commanders": [ + "Toski, Bearer of Secrets - Synergy (Indestructible)", + "Mondrak, Glory Dominus - Synergy (Indestructible)", + "Purphoros, God of the Forge - Synergy (Indestructible)", "Kodama of the West Tree - Synergy (Spirit Kindred)", "Kodama of the East Tree - Synergy (Spirit Kindred)", - "Junji, the Midnight Sky - Synergy (Spirit Kindred)", - "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Counters Matter)", - "Rishkar, Peema Renegade - Synergy (Counters Matter)", - "Syr Konrad, the Grim - Synergy (Big Mana)" + "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Protective Effects)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -7045,7 +7061,7 @@ "Auras", "Disturb", "Enchantments Matter", - "Protection from Color" + "Voltron" ], "primary_color": "Blue", "secondary_color": "White", @@ -8380,7 +8396,7 @@ "Sagas Matter", "Dinosaur Kindred", "Ore Counters", - "Burn" + "Indestructible" ], "primary_color": "Green", "secondary_color": "Red", @@ -8486,7 +8502,7 @@ "Aang, Airbending Master - Synergy (Bending)", "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (Mana Dork)", "Selvala, Heart of the Wilds - Synergy (Mana Dork)", - "Goreclaw, Terror of Qal Sisma - Synergy (X Spells)" + "Ghalta, Primal Hunger - Synergy (X Spells)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -8499,8 +8515,8 @@ "Banding", "Kithkin Kindred", "Knight Kindred", - "Partner", - "Minotaur Kindred" + "Protection", + "Partner" ], "primary_color": "White", "secondary_color": "Red", @@ -9743,10 +9759,10 @@ "theme": "God Kindred", "synergies": [ "Indestructible", + "Ward", + "Protective Effects", "Transform", - "Midrange", - "Exile Matters", - "Sacrifice Matters" + "Midrange" ], "primary_color": "Black", "secondary_color": "White", @@ -9769,9 +9785,10 @@ ], "synergy_commanders": [ "Toski, Bearer of Secrets - Synergy (Indestructible)", - "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Indestructible)", - "Veyran, Voice of Duality - Synergy (Transform)", - "Rishkar, Peema Renegade - Synergy (Midrange)" + "Mondrak, Glory Dominus - Synergy (Indestructible)", + "Adrix and Nev, Twincasters - Synergy (Ward)", + "Miirym, Sentinel Wyrm - Synergy (Ward)", + "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Protective Effects)" ], "popularity_bucket": "Niche", "editorial_quality": "draft", @@ -10555,10 +10572,10 @@ "theme": "Hexproof", "synergies": [ "Hexproof from", - "Protection", + "Protective Effects", "Indestructible", "Flash", - "Combat Tricks" + "Stax" ], "primary_color": "Green", "secondary_color": "Blue", @@ -10572,20 +10589,20 @@ "example_cards": [ "Swiftfoot Boots", "Heroic Intervention", + "Shadowspear", "Tamiyo's Safekeeping", "Dawn's Truce", "Plaza of Heroes", "Veil of Summer", - "Snakeskin Veil", - "Tyvar's Stand" + "Snakeskin Veil" ], "synergy_commanders": [ "General Ferrous Rokiric - Synergy (Hexproof from)", "Elenda, Saint of Dusk - Synergy (Hexproof from)", "Niv-Mizzet, Guildpact - Synergy (Hexproof from)", - "Boromir, Warden of the Tower - Synergy (Protection)", - "Avacyn, Angel of Hope - Synergy (Protection)", - "Toski, Bearer of Secrets - Synergy (Indestructible)" + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Purphoros, God of the Forge - Synergy (Indestructible)" ], "popularity_bucket": "Niche", "editorial_quality": "draft", @@ -10595,7 +10612,8 @@ "id": "hexproof-from", "theme": "Hexproof from", "synergies": [ - "Hexproof" + "Hexproof", + "Protective Effects" ], "primary_color": "Black", "secondary_color": "Green", @@ -10619,7 +10637,9 @@ "synergy_commanders": [ "Padeem, Consul of Innovation - Synergy (Hexproof)", "Skrelv, Defector Mite - Synergy (Hexproof)", - "Shalai, Voice of Plenty - Synergy (Hexproof)" + "Shalai, Voice of Plenty - Synergy (Hexproof)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -11033,11 +11053,11 @@ "Ulvenwald Hydra" ], "synergy_commanders": [ + "Ghalta, Primal Hunger - Synergy (X Spells)", "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)", - "Goreclaw, Terror of Qal Sisma - Synergy (X Spells)", - "Danitha Capashen, Paragon - Synergy (X Spells)", - "Ghalta, Primal Hunger - Synergy (Trample)", + "Emry, Lurker of the Loch - Synergy (X Spells)", "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Trample)", + "Ghalta, Stampede Tyrant - Synergy (Trample)", "Rishkar, Peema Renegade - Synergy (+1/+1 Counters)" ], "popularity_bucket": "Niche", @@ -11408,20 +11428,20 @@ "id": "indestructible", "theme": "Indestructible", "synergies": [ + "Divinity Counters", "God Kindred", - "Protection", + "Protective Effects", "Hexproof", - "Lifelink", - "Interaction" + "Lifelink" ], "primary_color": "White", "secondary_color": "Black", "example_commanders": [ "Toski, Bearer of Secrets", + "Mondrak, Glory Dominus", "Purphoros, God of the Forge", "Etali, Primal Conqueror // Etali, Primal Sickness", - "Boromir, Warden of the Tower", - "Avacyn, Angel of Hope" + "Boromir, Warden of the Tower" ], "example_cards": [ "Heroic Intervention", @@ -11434,9 +11454,10 @@ "Darksteel Citadel" ], "synergy_commanders": [ - "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (God Kindred)", - "Ojer Taq, Deepest Foundation // Temple of Civilization - Synergy (God Kindred)", - "Padeem, Consul of Innovation - Synergy (Hexproof)" + "Myojin of Life's Web - Synergy (Divinity Counters)", + "Myojin of Night's Reach - Synergy (Divinity Counters)", + "Myojin of Seeing Winds - Synergy (Divinity Counters)", + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (God Kindred)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -11556,10 +11577,10 @@ "theme": "Insect Kindred", "synergies": [ "Landfall", + "Shroud", "Poison Counters", "Druid Kindred", - "Horror Kindred", - "Time Counters" + "Horror Kindred" ], "primary_color": "Green", "secondary_color": "Black", @@ -11584,9 +11605,9 @@ "Tatyova, Benthic Druid - Synergy (Landfall)", "Aesi, Tyrant of Gyre Strait - Synergy (Landfall)", "Bristly Bill, Spine Sower - Synergy (Landfall)", - "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Poison Counters)", - "Skrelv, Defector Mite - Synergy (Poison Counters)", - "Rishkar, Peema Renegade - Synergy (Druid Kindred)" + "Shay Cormac - Synergy (Shroud)", + "Eladamri, Lord of Leaves - Synergy (Shroud)", + "Etali, Primal Conqueror // Etali, Primal Sickness - Synergy (Poison Counters)" ], "popularity_bucket": "Common", "editorial_quality": "draft", @@ -11637,7 +11658,7 @@ "Combat Tricks", "Board Wipes", "Counterspells", - "Protection from Color" + "Phasing" ], "primary_color": "White", "secondary_color": "Black", @@ -12344,9 +12365,9 @@ "synergies": [ "Flanking", "Adamant", + "Protection", "First strike", - "Double strike", - "Kithkin Kindred" + "Protection from Color" ], "primary_color": "White", "secondary_color": "Black", @@ -12370,7 +12391,8 @@ "synergy_commanders": [ "Sidar Kondo of Jamuraa - Synergy (Flanking)", "Sidar Jabari - Synergy (Flanking)", - "Telim'Tor - Synergy (Flanking)" + "Telim'Tor - Synergy (Flanking)", + "Yawgmoth, Thran Physician - Synergy (Protection)" ], "popularity_bucket": "Common", "editorial_quality": "draft", @@ -12462,9 +12484,9 @@ "synergies": [ "Draw Triggers", "Wheels", + "Protective Effects", "Creature Tokens", - "Stax", - "Big Mana" + "Stax" ], "primary_color": "Blue", "secondary_color": "Black", @@ -12491,7 +12513,7 @@ "Sheoldred, the Apocalypse - Synergy (Draw Triggers)", "Selvala, Heart of the Wilds - Synergy (Wheels)", "Niv-Mizzet, Parun - Synergy (Wheels)", - "Adeline, Resplendent Cathar - Synergy (Creature Tokens)" + "Toski, Bearer of Secrets - Synergy (Protective Effects)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -12780,10 +12802,10 @@ "theme": "Leech Kindred", "synergies": [ "Cost Reduction", + "X Spells", "Lifegain", "Life Matters", - "Burn", - "Little Fellas" + "Burn" ], "primary_color": "Black", "secondary_color": "Green", @@ -12792,7 +12814,7 @@ "Ghalta, Primal Hunger - Synergy (Cost Reduction)", "Emry, Lurker of the Loch - Synergy (Cost Reduction)", "Goreclaw, Terror of Qal Sisma - Synergy (Cost Reduction)", - "Tatyova, Benthic Druid - Synergy (Lifegain)" + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)" ], "example_cards": [ "Fumulus, the Infestation", @@ -12805,8 +12827,8 @@ "Monstrous War-Leech" ], "synergy_commanders": [ - "Sheoldred, the Apocalypse - Synergy (Lifegain)", - "Vito, Thorn of the Dusk Rose - Synergy (Life Matters)" + "Danitha Capashen, Paragon - Synergy (X Spells)", + "Tatyova, Benthic Druid - Synergy (Lifegain)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -16791,8 +16813,8 @@ "id": "phasing", "theme": "Phasing", "synergies": [ - "Protection", "Equipment Matters", + "Protective Effects", "Interaction", "Removal", "Artifacts Matter" @@ -16804,7 +16826,7 @@ "King of the Oathbreakers", "Cyclonus, the Saboteur // Cyclonus, Cybertronian Fighter", "Taniwha", - "Boromir, Warden of the Tower - Synergy (Protection)" + "Sram, Senior Edificer - Synergy (Equipment Matters)" ], "example_cards": [ "Teferi's Protection", @@ -16817,10 +16839,10 @@ "Teferi, Master of Time" ], "synergy_commanders": [ - "Avacyn, Angel of Hope - Synergy (Protection)", - "Yawgmoth, Thran Physician - Synergy (Protection)", - "Sram, Senior Edificer - Synergy (Equipment Matters)", "Kodama of the West Tree - Synergy (Equipment Matters)", + "Danitha Capashen, Paragon - Synergy (Equipment Matters)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", "Syr Konrad, the Grim - Synergy (Interaction)" ], "popularity_bucket": "Rare", @@ -17553,38 +17575,34 @@ "id": "protection", "theme": "Protection", "synergies": [ + "Protection from Creature Type", "Protection from Color", - "Indestructible", - "Phasing", - "Hexproof", - "Shroud" + "Protection from Quality", + "Protective Effects", + "Knight Kindred" ], "primary_color": "White", "secondary_color": "Green", "example_commanders": [ - "Boromir, Warden of the Tower", - "Avacyn, Angel of Hope", "Yawgmoth, Thran Physician", - "Padeem, Consul of Innovation", - "Skrelv, Defector Mite" + "Emrakul, the Promised End", + "Greensleeves, Maro-Sorcerer", + "Animar, Soul of Elements", + "Emrakul, the World Anew" ], "example_cards": [ - "Swiftfoot Boots", - "Lightning Greaves", - "Heroic Intervention", - "The One Ring", - "Teferi's Protection", - "Boros Charm", - "Flawless Maneuver", - "Akroma's Will" + "Yawgmoth, Thran Physician", + "Karmic Guide", + "Stonecoil Serpent", + "Emrakul, the Promised End", + "Greensleeves, Maro-Sorcerer", + "Animar, Soul of Elements", + "Emrakul, the World Anew", + "Reaver Titan" ], "synergy_commanders": [ - "Emrakul, the World Anew - Synergy (Protection from Color)", - "Éowyn, Fearless Knight - Synergy (Protection from Color)", - "Lord of the Nazgûl - Synergy (Protection from Color)", - "Toski, Bearer of Secrets - Synergy (Indestructible)", - "Purphoros, God of the Forge - Synergy (Indestructible)", - "The War Doctor - Synergy (Phasing)" + "Katilda, Dawnhart Prime - Synergy (Protection from Creature Type)", + "Katilda, Dawnhart Martyr // Katilda's Rising Dawn - Synergy (Protection from Creature Type)" ], "popularity_bucket": "Common", "editorial_quality": "draft", @@ -17595,41 +17613,184 @@ "theme": "Protection from Color", "synergies": [ "Protection", - "Enchant", - "Interaction", - "Auras", - "Cleric Kindred" + "Protective Effects", + "Threshold", + "Knight Kindred", + "Angel Kindred" ], "primary_color": "White", "secondary_color": "Green", "example_commanders": [ - "Emrakul, the World Anew", - "Éowyn, Fearless Knight", + "Greensleeves, Maro-Sorcerer", + "Animar, Soul of Elements", "Lord of the Nazgûl", - "Katilda, Dawnhart Martyr // Katilda's Rising Dawn", - "Eight-and-a-Half-Tails" + "Akroma, Angel of Wrath", + "Teysa, Envoy of Ghosts" ], "example_cards": [ - "The One Ring", - "Teferi's Protection", "Akroma's Will", - "Mother of Runes", "Commander's Plate", "Sword of Feast and Famine", "Sword of Hearth and Home", - "Sejiri Shelter // Sejiri Glacier" + "Karmic Guide", + "Akroma's Memorial", + "Sword of Fire and Ice", + "Giver of Runes" ], "synergy_commanders": [ - "Boromir, Warden of the Tower - Synergy (Protection)", - "Avacyn, Angel of Hope - Synergy (Protection)", "Yawgmoth, Thran Physician - Synergy (Protection)", - "Dorothea, Vengeful Victim // Dorothea's Retribution - Synergy (Enchant)", - "Syr Konrad, the Grim - Synergy (Interaction)" + "Emrakul, the Promised End - Synergy (Protection)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Kiora, the Rising Tide - Synergy (Threshold)" ], "popularity_bucket": "Niche", "editorial_quality": "draft", "description": "Builds around Protection from Color leveraging synergies with Protection and Enchant." }, + { + "id": "protection-from-creature-type", + "theme": "Protection from Creature Type", + "synergies": [ + "Protection", + "Protective Effects", + "Human Kindred", + "Little Fellas" + ], + "primary_color": "White", + "secondary_color": "Black", + "example_commanders": [ + "Yawgmoth, Thran Physician", + "Katilda, Dawnhart Prime", + "Katilda, Dawnhart Martyr // Katilda's Rising Dawn", + "Emrakul, the Promised End - Synergy (Protection)", + "Greensleeves, Maro-Sorcerer - Synergy (Protection)" + ], + "example_cards": [ + "Yawgmoth, Thran Physician", + "Katilda, Dawnhart Prime", + "Baneslayer Angel", + "Katilda, Dawnhart Martyr // Katilda's Rising Dawn", + "Elite Inquisitor", + "Dragon Hunter", + "Dragonstalker", + "Grave Bramble" + ], + "synergy_commanders": [ + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Syr Konrad, the Grim - Synergy (Human Kindred)" + ], + "popularity_bucket": "Rare", + "editorial_quality": "draft", + "description": "Builds around Protection from Creature Type leveraging synergies with Protection and Protective Effects." + }, + { + "id": "protection-from-creatures", + "theme": "Protection from Creatures", + "synergies": [ + "Protective Effects" + ], + "primary_color": "White", + "secondary_color": "Blue", + "example_commanders": [ + "Commander Eesha", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Purphoros, God of the Forge - Synergy (Protective Effects)" + ], + "example_cards": [ + "Spirit Mantle", + "Unquestioned Authority", + "Seasoned Dungeoneer", + "Holy Mantle", + "Riders of Gavony", + "Commander Eesha", + "Crypsis", + "Beloved Chaplain" + ], + "popularity_bucket": "Rare", + "editorial_quality": "draft", + "description": "Builds around Protection from Creatures leveraging synergies with Protective Effects." + }, + { + "id": "protection-from-quality", + "theme": "Protection from Quality", + "synergies": [ + "Protection", + "Protective Effects", + "Cleric Kindred", + "Interaction", + "Spirit Kindred" + ], + "primary_color": "White", + "secondary_color": "Green", + "example_commanders": [ + "Emrakul, the Promised End", + "Emrakul, the World Anew", + "Pippin, Guard of the Citadel", + "Éowyn, Fearless Knight", + "Progenitus" + ], + "example_cards": [ + "The One Ring", + "Teferi's Protection", + "Mother of Runes", + "Sejiri Shelter // Sejiri Glacier", + "Emrakul, the Promised End", + "Sword of Wealth and Power", + "Alseid of Life's Bounty", + "Serra's Emissary" + ], + "synergy_commanders": [ + "Yawgmoth, Thran Physician - Synergy (Protection)", + "Greensleeves, Maro-Sorcerer - Synergy (Protection)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Vito, Thorn of the Dusk Rose - Synergy (Cleric Kindred)" + ], + "popularity_bucket": "Niche", + "editorial_quality": "draft", + "description": "Builds around Protection from Quality leveraging synergies with Protection and Protective Effects." + }, + { + "id": "protective-effects", + "theme": "Protective Effects", + "synergies": [ + "Indestructible", + "Ward", + "Hexproof", + "Protection from Color", + "Protection" + ], + "primary_color": "White", + "secondary_color": "Green", + "example_commanders": [ + "Toski, Bearer of Secrets", + "Mondrak, Glory Dominus", + "Purphoros, God of the Forge", + "Etali, Primal Conqueror // Etali, Primal Sickness", + "Boromir, Warden of the Tower" + ], + "example_cards": [ + "Swiftfoot Boots", + "Lightning Greaves", + "Heroic Intervention", + "The One Ring", + "Teferi's Protection", + "Roaming Throne", + "Boros Charm", + "Flawless Maneuver" + ], + "synergy_commanders": [ + "Adrix and Nev, Twincasters - Synergy (Ward)", + "Miirym, Sentinel Wyrm - Synergy (Ward)", + "Padeem, Consul of Innovation - Synergy (Hexproof)" + ], + "popularity_bucket": "Very Common", + "editorial_quality": "draft", + "description": "Builds around Protective Effects leveraging synergies with Indestructible and Ward." + }, { "id": "prototype", "theme": "Prototype", @@ -18202,8 +18363,8 @@ "Ghyrson Starn, Kelermorph - Synergy (Tyranid Kindred)", "Old One Eye - Synergy (Tyranid Kindred)", "Magus Lucea Kane - Synergy (Tyranid Kindred)", - "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)", - "Goreclaw, Terror of Qal Sisma - Synergy (X Spells)" + "Ghalta, Primal Hunger - Synergy (X Spells)", + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)" ], "example_cards": [ "Jacked Rabbit", @@ -19742,7 +19903,7 @@ "Cost Reduction", "Stax", "Loot", - "Big Mana" + "X Spells" ], "primary_color": "Blue", "secondary_color": "Red", @@ -20086,37 +20247,38 @@ "id": "shroud", "theme": "Shroud", "synergies": [ - "Protection", - "Interaction", + "Protective Effects", + "Insect Kindred", + "Beast Kindred", "Enchant", - "Auras", - "Enchantments Matter" + "Auras" ], "primary_color": "Green", "secondary_color": "Blue", "example_commanders": [ + "Shay Cormac", "Eladamri, Lord of Leaves", "Multani, Maro-Sorcerer", "Kodama of the North Tree", - "Autumn Willow", - "Boromir, Warden of the Tower - Synergy (Protection)" + "Autumn Willow" ], "example_cards": [ "Lightning Greaves", + "Whispersilk Cloak", "Sylvan Safekeeper", "Sterling Grove", + "Arcane Lighthouse", "Silver Shroud Costume", "Greater Auramancy", - "Pemmin's Aura", - "Argothian Enchantress", - "Scion of Oona" + "Pemmin's Aura" ], "synergy_commanders": [ - "Avacyn, Angel of Hope - Synergy (Protection)", - "Yawgmoth, Thran Physician - Synergy (Protection)", - "Syr Konrad, the Grim - Synergy (Interaction)", - "Purphoros, God of the Forge - Synergy (Interaction)", - "Katilda, Dawnhart Martyr // Katilda's Rising Dawn - Synergy (Enchant)" + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Purphoros, God of the Forge - Synergy (Protective Effects)", + "The Locust God - Synergy (Insect Kindred)", + "Mazirek, Kraul Death Priest - Synergy (Insect Kindred)", + "Loot, Exuberant Explorer - Synergy (Beast Kindred)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -22184,7 +22346,7 @@ "Minion Kindred", "Rat Kindred", "Reanimate", - "Mill" + "Protection from Color" ], "primary_color": "Black", "secondary_color": "Green", @@ -23006,8 +23168,8 @@ "Shaman Kindred", "Trample", "Warrior Kindred", - "+1/+1 Counters", - "Counters Matter" + "Protective Effects", + "+1/+1 Counters" ], "primary_color": "Green", "secondary_color": "Black", @@ -23045,10 +23207,10 @@ "theme": "Turtle Kindred", "synergies": [ "Ward", + "Protective Effects", "Toughness Matters", "Stax", - "Little Fellas", - "Big Mana" + "Little Fellas" ], "primary_color": "Blue", "secondary_color": "Green", @@ -23072,10 +23234,10 @@ "synergy_commanders": [ "Adrix and Nev, Twincasters - Synergy (Ward)", "Miirym, Sentinel Wyrm - Synergy (Ward)", - "Codsworth, Handy Helper - Synergy (Ward)", - "Azusa, Lost but Seeking - Synergy (Toughness Matters)", - "Sheoldred, the Apocalypse - Synergy (Toughness Matters)", - "Kutzil, Malamet Exemplar - Synergy (Stax)" + "Daxos, Blessed by the Sun - Synergy (Ward)", + "Toski, Bearer of Secrets - Synergy (Protective Effects)", + "Mondrak, Glory Dominus - Synergy (Protective Effects)", + "Azusa, Lost but Seeking - Synergy (Toughness Matters)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -23111,8 +23273,8 @@ "Aberrant" ], "synergy_commanders": [ + "Ghalta, Primal Hunger - Synergy (X Spells)", "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)", - "Goreclaw, Terror of Qal Sisma - Synergy (X Spells)", "Azusa, Lost but Seeking - Synergy (Ramp)" ], "popularity_bucket": "Rare", @@ -23197,9 +23359,9 @@ "synergies": [ "Politics", "Cost Reduction", + "X Spells", "Big Mana", - "Spells Matter", - "Spellslinger" + "Spells Matter" ], "primary_color": "White", "secondary_color": "Black", @@ -23219,7 +23381,7 @@ "Elspeth's Devotee" ], "synergy_commanders": [ - "Syr Konrad, the Grim - Synergy (Big Mana)" + "Birgi, God of Storytelling // Harnfel, Horn of Bounty - Synergy (X Spells)" ], "popularity_bucket": "Rare", "editorial_quality": "draft", @@ -23270,7 +23432,7 @@ "Aristocrats", "+1/+1 Counters", "Zombie Kindred", - "Counters Matter" + "Blink" ], "primary_color": "Black", "secondary_color": "Red", @@ -23849,38 +24011,37 @@ "id": "ward", "theme": "Ward", "synergies": [ - "Turtle Kindred", - "Protection", - "Equipment", - "Equip", - "Dinosaur Kindred" + "Demigod Kindred", + "Disguise", + "Cloak", + "Protective Effects", + "Turtle Kindred" ], "primary_color": "Blue", "secondary_color": "Green", "example_commanders": [ "Adrix and Nev, Twincasters", "Miirym, Sentinel Wyrm", + "Daxos, Blessed by the Sun", "Codsworth, Handy Helper", - "Ulamog, the Defiler", - "Valgavoth, Terror Eater" + "Ulamog, the Defiler" ], "example_cards": [ "Roaming Throne", + "Gray Merchant of Asphodel", + "Thassa's Oracle", "Flowering of the White Tree", "Brotherhood Regalia", "Innkeeper's Talent", "Kappa Cannoneer", - "Adrix and Nev, Twincasters", - "Thran Power Suit", - "Miirym, Sentinel Wyrm" + "Adrix and Nev, Twincasters" ], "synergy_commanders": [ - "Kogla and Yidaro - Synergy (Turtle Kindred)", - "The Pride of Hull Clade - Synergy (Turtle Kindred)", - "Archelos, Lagoon Mystic - Synergy (Turtle Kindred)", - "Boromir, Warden of the Tower - Synergy (Protection)", - "Avacyn, Angel of Hope - Synergy (Protection)", - "The Reality Chip - Synergy (Equipment)" + "Renata, Called to the Hunt - Synergy (Demigod Kindred)", + "Anikthea, Hand of Erebos - Synergy (Demigod Kindred)", + "Bayek of Siwa - Synergy (Disguise)", + "Arno Dorian - Synergy (Disguise)", + "Etrata, Deadly Fugitive - Synergy (Cloak)" ], "popularity_bucket": "Niche", "editorial_quality": "draft", @@ -24605,35 +24766,35 @@ "id": "x-spells", "theme": "X Spells", "synergies": [ + "Affinity", "Ravenous", "Firebending", - "Hydra Kindred", - "Tyranid Kindred", - "Cost Reduction" + "Cost Reduction", + "Undaunted" ], "primary_color": "Red", "secondary_color": "Green", "example_commanders": [ + "Ghalta, Primal Hunger", "Birgi, God of Storytelling // Harnfel, Horn of Bounty", + "Emry, Lurker of the Loch", "Goreclaw, Terror of Qal Sisma", - "Danitha Capashen, Paragon", - "Baral, Chief of Compliance", - "Mikaeus, the Lunarch" + "Danitha Capashen, Paragon" ], "example_cards": [ + "Blasphemous Act", + "Boseiju, Who Endures", + "Otawara, Soaring City", "Herald's Horn", + "Takenuma, Abandoned Mire", + "The Great Henge", "Foundry Inspector", - "Finale of Devastation", - "Jet Medallion", - "Urza's Incubator", - "Exsanguinate", - "Ruby Medallion", - "Etherium Sculptor" + "Finale of Devastation" ], "synergy_commanders": [ - "Fire Lord Zuko - Synergy (Firebending)", - "Zuko, Exiled Prince - Synergy (Firebending)", - "The Goose Mother - Synergy (Hydra Kindred)" + "Urza, Chief Artificer - Synergy (Affinity)", + "Nahiri, Forged in Fury - Synergy (Affinity)", + "Fire Lord Zuko - Synergy (Firebending)" ], "popularity_bucket": "Very Common", "editorial_quality": "draft", @@ -24844,7 +25005,7 @@ "Cantrips": 88, "Card Draw": 309, "Combat Tricks": 214, - "Interaction": 949, + "Interaction": 935, "Unconditional Draw": 133, "Bending": 5, "Cost Reduction": 68, @@ -24870,7 +25031,9 @@ "Sloth Kindred": 3, "Lands Matter": 169, "Gargoyle Kindred": 11, - "Protection": 276, + "Protection": 65, + "Protection from Color": 95, + "Protective Effects": 375, "Griffin Kindred": 43, "Cleric Kindred": 365, "Backgrounds Matter": 11, @@ -24879,8 +25042,7 @@ "Warrior Kindred": 155, "Control": 221, "Toolbox": 90, - "Removal": 409, - "Protection from Color": 69, + "Removal": 412, "Aristocrats": 155, "Haunt": 4, "Sacrifice Matters": 155, @@ -24889,7 +25051,7 @@ "Stax": 449, "+1/+1 Counters": 462, "Spirit Kindred": 223, - "X Spells": 60, + "X Spells": 100, "Cat Kindred": 132, "Entwine": 6, "Bolster": 13, @@ -24898,13 +25060,14 @@ "Knight Kindred": 237, "Battle Cry": 5, "Burn": 216, + "Ward": 39, "Survival": 5, "Survivor Kindred": 5, "Artifact Tokens": 132, "Charge Counters": 11, "Clones": 40, "Station": 5, - "Indestructible": 98, + "Indestructible": 140, "Vampire Kindred": 35, "Gnome Kindred": 13, "Angel Kindred": 218, @@ -24923,7 +25086,7 @@ "First strike": 126, "Scout Kindred": 54, "Construct Kindred": 15, - "Hexproof": 35, + "Hexproof": 40, "Convoke": 25, "Vehicles": 64, "Dwarf Kindred": 45, @@ -24985,6 +25148,7 @@ "Replacement Draw": 2, "Wheels": 39, "Nymph Kindred": 4, + "Protection from Quality": 49, "Coven": 10, "Peasant Kindred": 19, "Transform": 65, @@ -25045,7 +25209,6 @@ "Dinosaur Kindred": 29, "Sliver Kindred": 21, "Armadillo Kindred": 1, - "Ward": 22, "Horse Kindred": 11, "Celebration": 5, "Mouse Kindred": 13, @@ -25080,6 +25243,7 @@ "Shapeshifter Kindred": 9, "Boast": 4, "Detain": 5, + "Protection from Creature Type": 7, "Miracle": 6, "Doctor Kindred": 10, "Doctor's Companion": 8, @@ -25098,6 +25262,7 @@ "Bat Kindred": 11, "Enrage": 3, "Disturb": 10, + "Protection from Creatures": 7, "Flanking": 15, "Banding": 19, "Unicorn Kindred": 25, @@ -25398,16 +25563,16 @@ "Bird Kindred": 148, "Flying": 771, "Toughness Matters": 908, - "Aggro": 898, + "Aggro": 897, "Aristocrats": 119, "Auras": 347, - "Combat Matters": 898, + "Combat Matters": 897, "Enchant": 305, "Enchantments Matter": 735, "Midrange": 54, "Sacrifice Matters": 110, "Theft": 114, - "Voltron": 598, + "Voltron": 597, "Big Mana": 1224, "Elf Kindred": 11, "Mill": 564, @@ -25421,19 +25586,19 @@ "Manifest dread": 9, "Control": 666, "Counterspells": 348, - "Interaction": 827, + "Interaction": 824, "Stax": 915, "Fish Kindred": 43, "Flash": 169, - "Ward": 44, - "Protection": 88, - "Shroud": 24, + "Protective Effects": 198, + "Ward": 58, + "Shroud": 34, "Threshold": 9, "Historics Matter": 292, "Legends Matter": 292, "Noble Kindred": 13, "Octopus Kindred": 42, - "Removal": 257, + "Removal": 258, "Creature Tokens": 191, "Devoid": 34, "Eldrazi Kindred": 42, @@ -25441,7 +25606,7 @@ "Scion Kindred": 6, "Token Creation": 271, "Tokens Matter": 272, - "+1/+1 Counters": 223, + "+1/+1 Counters": 222, "Counters Matter": 478, "Drake Kindred": 75, "Kicker": 29, @@ -25450,6 +25615,7 @@ "Loot": 246, "Wizard Kindred": 526, "Cost Reduction": 144, + "X Spells": 194, "Artifacts Matter": 621, "Equipment Matters": 90, "Lands Matter": 198, @@ -25503,7 +25669,6 @@ "Max speed": 4, "Start your engines!": 4, "Scry": 138, - "X Spells": 109, "Shapeshifter Kindred": 58, "Evoke": 6, "Leviathan Kindred": 21, @@ -25547,7 +25712,7 @@ "Crab Kindred": 35, "Dragon Kindred": 45, "Elder Kindred": 4, - "Hexproof": 46, + "Hexproof": 66, "Faerie Kindred": 81, "Mana Dork": 47, "Morph": 43, @@ -25589,6 +25754,8 @@ "Amass": 13, "Army Kindred": 13, "Embalm": 5, + "Protection": 14, + "Protection from Color": 12, "Scout Kindred": 29, "Cycling": 74, "Jellyfish Kindred": 21, @@ -25599,6 +25766,7 @@ "Peasant Kindred": 3, "Griffin Kindred": 3, "Beeble Kindred": 3, + "Protection from Quality": 7, "Venture into the dungeon": 7, "Improvise": 8, "Cloak": 2, @@ -25704,6 +25872,7 @@ "Umbra armor": 4, "Dinosaur Kindred": 7, "Emerge": 6, + "Protection from Creatures": 1, "Worm Kindred": 2, "Processor Kindred": 4, "Bestow": 7, @@ -25823,7 +25992,7 @@ "Praetor Kindred": 3, "Experience Counters": 1, "Exhaust": 6, - "Indestructible": 4, + "Indestructible": 9, "Kithkin Kindred": 1, "Flanking": 1, "Minotaur Kindred": 1, @@ -25947,7 +26116,7 @@ "Big Mana": 1197, "Spells Matter": 1373, "Spellslinger": 1373, - "X Spells": 81, + "X Spells": 129, "Aggro": 1210, "Aristocrats": 658, "Combat Matters": 1210, @@ -25960,8 +26129,10 @@ "Demon Kindred": 164, "Flying": 476, "Harpy Kindred": 11, + "Protective Effects": 129, "Token Creation": 415, "Tokens Matter": 416, + "Ward": 35, "Combat Tricks": 174, "Interaction": 808, "Midrange": 69, @@ -25992,8 +26163,7 @@ "Trample": 54, "Specter Kindred": 21, "Centaur Kindred": 3, - "Indestructible": 32, - "Protection": 54, + "Indestructible": 57, "Warrior Kindred": 168, "Intimidate": 13, "Spirit Kindred": 145, @@ -26066,13 +26236,13 @@ "Disguise": 4, "Madness": 29, "Void": 10, - "Ward": 17, "Warp": 14, "Skeleton Kindred": 66, "Charge Counters": 9, "Mana Rock": 12, "Craft": 4, "Graveyard Matters": 5, + "Hexproof": 9, "Fabricate": 5, "Construct Kindred": 10, "Insect Kindred": 79, @@ -26171,6 +26341,8 @@ "Horse Kindred": 9, "Cat Kindred": 16, "Land Types Matter": 36, + "Protection": 26, + "Protection from Color": 27, "Equip": 32, "Equipment": 35, "Job select": 4, @@ -26237,6 +26409,7 @@ "Undying": 8, "Flanking": 4, "Changeling": 8, + "Shroud": 3, "Horsemanship": 7, "Council's dilemma": 1, "Alien Kindred": 5, @@ -26356,7 +26529,6 @@ "Glimmer Kindred": 2, "Miracle": 2, "Station": 4, - "Hexproof": 5, "Hexproof from": 2, "Fox Kindred": 1, "Defense Counters": 1, @@ -26369,6 +26541,7 @@ "Goad": 5, "Learn": 3, "Inkling Kindred": 2, + "Protection from Quality": 1, "Map Token": 1, "Skulk": 5, "Revolt": 3, @@ -26444,7 +26617,6 @@ "Plant Kindred": 2, "Manticore Kindred": 1, "Hit Counters": 2, - "Protection from Color": 2, "Cipher": 5, "Hour Counters": 1, "Processor Kindred": 2, @@ -26454,6 +26626,7 @@ "Astartes Kindred": 4, "Primarch Kindred": 1, "Divinity Counters": 1, + "Protection from Creature Type": 2, "Feeding Counters": 1, "Multiple Copies": 4, "Nazgûl": 1, @@ -26532,11 +26705,11 @@ "Little Fellas": 1255, "Mana Dork": 57, "Ramp": 98, - "Aggro": 1406, - "Combat Matters": 1406, + "Aggro": 1405, + "Combat Matters": 1405, "Combat Tricks": 160, "Discard Matters": 303, - "Interaction": 630, + "Interaction": 631, "Madness": 18, "Mill": 341, "Reanimate": 261, @@ -26560,10 +26733,10 @@ "Draw Triggers": 54, "Tyranid Kindred": 4, "Wheels": 58, - "+1/+1 Counters": 248, - "Counters Matter": 435, + "+1/+1 Counters": 247, + "Counters Matter": 434, "Renown": 5, - "Voltron": 536, + "Voltron": 535, "Auras": 196, "Enchant": 159, "Goad": 29, @@ -26622,7 +26795,9 @@ "Equipment Matters": 141, "Samurai Kindred": 20, "Shaman Kindred": 175, - "Protection": 19, + "Protection": 15, + "Protection from Color": 18, + "Protective Effects": 58, "Conditional Draw": 42, "Phyrexian Kindred": 44, "Ally Kindred": 19, @@ -26649,7 +26824,7 @@ "Planeswalkers": 67, "Superfriends": 67, "Vampire Kindred": 54, - "X Spells": 136, + "X Spells": 187, "Land Types Matter": 31, "Backgrounds Matter": 13, "Choose a background": 7, @@ -26665,6 +26840,7 @@ "Time Counters": 24, "Demigod Kindred": 1, "Satyr Kindred": 14, + "Ward": 22, "Elder Kindred": 2, "Fade Counters": 1, "Fading": 1, @@ -26795,7 +26971,7 @@ "Cumulative upkeep": 7, "Shark Kindred": 4, "Mouse Kindred": 9, - "Indestructible": 8, + "Indestructible": 17, "Discover": 9, "Card Selection": 2, "Explore": 1, @@ -26856,7 +27032,6 @@ "Encore": 4, "Domain": 6, "Multikicker": 4, - "Ward": 4, "Manticore Kindred": 9, "Treefolk Kindred": 1, "Licid Kindred": 2, @@ -26972,7 +27147,6 @@ "Exalted": 1, "Islandwalk": 1, "Battle Cry": 5, - "Protection from Color": 1, "Troll Kindred": 3, "Meld": 1, "Aim Counters": 1, @@ -27036,6 +27210,7 @@ "Bargain": 3, "Fish Kindred": 2, "Job select": 3, + "Protection from Quality": 1, "Ice Counters": 1, "Shell Counters": 1, "Badger Kindred": 2, @@ -27098,13 +27273,13 @@ "Token Creation": 520, "Tokens Matter": 529, "Artifacts Matter": 449, - "Interaction": 549, + "Interaction": 548, "Little Fellas": 1380, "Mutant Kindred": 27, "Ravenous": 7, "Removal": 248, "Tyranid Kindred": 16, - "X Spells": 119, + "X Spells": 171, "-1/-1 Counters": 66, "Age Counters": 19, "Cumulative upkeep": 15, @@ -27187,21 +27362,21 @@ "Elephant Kindred": 43, "Cycling": 52, "Discard Matters": 87, - "Indestructible": 41, + "Indestructible": 65, "Loot": 52, - "Protection": 105, + "Protective Effects": 247, "Vehicles": 25, "Revolt": 6, "Scout Kindred": 97, "Stax": 271, - "Hexproof": 67, + "Hexproof": 80, "Faerie Kindred": 13, "Soldier Kindred": 37, "Mount Kindred": 14, "Saddle": 9, "Troll Kindred": 29, "Crocodile Kindred": 11, - "Shroud": 28, + "Shroud": 32, "Brushwagg Kindred": 4, "Exile Matters": 87, "Outlaw Kindred": 31, @@ -27223,7 +27398,7 @@ "Treasure": 26, "Treasure Token": 25, "Turtle Kindred": 12, - "Ward": 31, + "Ward": 51, "Elder Kindred": 3, "Flying": 48, "Mana Rock": 16, @@ -27236,6 +27411,8 @@ "Rooms Matter": 5, "Frog Kindred": 26, "Threshold": 22, + "Protection": 28, + "Protection from Color": 20, "Enrage": 10, "Chimera Kindred": 4, "Hydra Kindred": 45, @@ -27448,7 +27625,6 @@ "Pilot Kindred": 4, "Sand Kindred": 2, "Egg Kindred": 2, - "Protection from Color": 4, "Soulbond": 8, "Employee Kindred": 3, "Robot Kindred": 5, @@ -27480,6 +27656,7 @@ "Melee": 2, "Overload": 2, "Nightmare Kindred": 1, + "Protection from Quality": 11, "Fox Kindred": 2, "Learn": 3, "Encore": 1, @@ -27513,6 +27690,7 @@ "Skeleton Kindred": 1, "Undergrowth": 6, "Paradox": 2, + "Protection from Creature Type": 2, "Crab Kindred": 1, "Riot": 3, "Kithkin Kindred": 3, @@ -27638,12 +27816,12 @@ "generated_from": "merge (analytics + curated YAML + whitelist)", "metadata_info": { "mode": "merge", - "generated_at": "2025-10-09T23:48:28", - "curated_yaml_files": 735, + "generated_at": "2025-10-13T04:26:36", + "curated_yaml_files": 739, "synergy_cap": 5, "inference": "pmi", "version": "phase-b-merge-v1", - "catalog_hash": "fd3931305d6c86535e5e9c5ec4e95b5a05bc7b125b9dd6557600a4b3cf610ce4" + "catalog_hash": "a6ca486659ada6088f6cba7e5aab4f5dd64cf66e8d2eb31280e4fbd5b67167b8" }, "description_fallback_summary": null } \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 5470410..fe54379 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -98,7 +98,7 @@ services: WEB_AUTO_SETUP: "1" # 1=auto-run setup/tagging when needed WEB_AUTO_REFRESH_DAYS: "7" # Refresh cards.csv if older than N days; 0=never WEB_TAG_PARALLEL: "1" # 1=parallelize tagging - WEB_TAG_WORKERS: "8" # Worker count when parallel tagging + WEB_TAG_WORKERS: "4" # Worker count when parallel tagging # Tagging Refinement Feature Flags TAG_NORMALIZE_KEYWORDS: "1" # 1=normalize keywords & filter specialty mechanics (recommended)