mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 23:50:12 +01:00
feat: add keyword normalization and protection grant detection, fix template syntax and polling issues
This commit is contained in:
parent
86ec68acb4
commit
06d8796316
17 changed files with 1692 additions and 611 deletions
203
code/scripts/audit_protection_full_v2.py
Normal file
203
code/scripts/audit_protection_full_v2.py
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
"""
|
||||
Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
|
||||
|
||||
Created: October 8, 2025
|
||||
Purpose: Audit and validate Protection tag precision after implementing grant detection.
|
||||
Can be re-run periodically to check tagging quality.
|
||||
|
||||
This script audits ALL Protection-tagged cards and categorizes them:
|
||||
- Grant: Gives broad protection to other permanents YOU control
|
||||
- Kindred: Gives protection to specific creature types (metadata tags)
|
||||
- Mixed: Both broad and kindred/inherent
|
||||
- Inherent: Only has protection itself
|
||||
- ConditionalSelf: Only conditionally grants to itself
|
||||
- Opponent: Grants to opponent's permanents
|
||||
- Neither: False positive
|
||||
|
||||
Outputs:
|
||||
- m2_audit_v2.json: Full analysis with summary
|
||||
- m2_audit_v2_grant.csv: Cards for main Protection tag
|
||||
- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
|
||||
- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
|
||||
- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
|
||||
- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
|
||||
- m2_audit_v2_opponent.csv: Opponent grants (exclude)
|
||||
- m2_audit_v2_neither.csv: False positives (exclude)
|
||||
- m2_audit_v2_all.csv: All cards combined
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from code.tagging.protection_grant_detection import (
|
||||
categorize_protection_card,
|
||||
get_kindred_protection_tags,
|
||||
is_granting_protection,
|
||||
)
|
||||
|
||||
def load_all_cards():
|
||||
"""Load all cards from color/identity CSV files."""
|
||||
csv_dir = project_root / 'csv_files'
|
||||
|
||||
# Get all color/identity CSVs (not the raw cards.csv)
|
||||
csv_files = list(csv_dir.glob('*_cards.csv'))
|
||||
csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
|
||||
|
||||
all_cards = []
|
||||
for csv_file in csv_files:
|
||||
try:
|
||||
df = pd.read_csv(csv_file)
|
||||
all_cards.append(df)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load {csv_file.name}: {e}")
|
||||
|
||||
# Combine all DataFrames
|
||||
combined = pd.concat(all_cards, ignore_index=True)
|
||||
|
||||
# Drop duplicates (cards appear in multiple color files)
|
||||
combined = combined.drop_duplicates(subset=['name'], keep='first')
|
||||
|
||||
return combined
|
||||
|
||||
def audit_all_protection_cards():
|
||||
"""Audit all Protection-tagged cards."""
|
||||
print("Loading all cards...")
|
||||
df = load_all_cards()
|
||||
|
||||
print(f"Total cards loaded: {len(df)}")
|
||||
|
||||
# Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
|
||||
df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
|
||||
|
||||
print(f"Protection-tagged cards: {len(df_prot)}")
|
||||
|
||||
# Categorize each card
|
||||
categories = []
|
||||
grants_list = []
|
||||
kindred_tags_list = []
|
||||
|
||||
for idx, row in df_prot.iterrows():
|
||||
name = row['name']
|
||||
text = str(row.get('text', '')).replace('\\n', '\n') # Convert escaped newlines to real newlines
|
||||
keywords = str(row.get('keywords', ''))
|
||||
card_type = str(row.get('type', ''))
|
||||
|
||||
# Categorize with kindred exclusion enabled
|
||||
category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
|
||||
|
||||
# Check if it grants broadly
|
||||
grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
|
||||
|
||||
# Get kindred tags
|
||||
kindred_tags = get_kindred_protection_tags(text)
|
||||
|
||||
categories.append(category)
|
||||
grants_list.append(grants_broad)
|
||||
kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
|
||||
|
||||
df_prot['category'] = categories
|
||||
df_prot['grants_broad'] = grants_list
|
||||
df_prot['kindred_tags'] = kindred_tags_list
|
||||
|
||||
# Generate summary (convert numpy types to native Python for JSON serialization)
|
||||
summary = {
|
||||
'total': int(len(df_prot)),
|
||||
'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
|
||||
'grants_broad_count': int(df_prot['grants_broad'].sum()),
|
||||
'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
|
||||
}
|
||||
|
||||
# Calculate keep vs remove
|
||||
keep_categories = {'Grant', 'Mixed'}
|
||||
kindred_only = df_prot[df_prot['category'] == 'Kindred']
|
||||
keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
|
||||
remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
|
||||
|
||||
summary['keep_main_tag'] = keep_count
|
||||
summary['kindred_metadata'] = len(kindred_only)
|
||||
summary['remove'] = remove_count
|
||||
summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print("AUDIT SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total Protection-tagged cards: {summary['total']}")
|
||||
print(f"\nCategories:")
|
||||
for cat, count in sorted(summary['categories'].items()):
|
||||
pct = (count / summary['total']) * 100
|
||||
print(f" {cat:20s} {count:4d} ({pct:5.1f}%)")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Main Protection tag: {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
|
||||
print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
|
||||
print(f"Remove: {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
|
||||
print(f"{'='*60}")
|
||||
print(f"Precision estimate: {summary['precision_estimate']}%")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Export results
|
||||
output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Export JSON summary
|
||||
with open(output_dir / 'm2_audit_v2.json', 'w') as f:
|
||||
json.dump({
|
||||
'summary': summary,
|
||||
'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
|
||||
}, f, indent=2)
|
||||
|
||||
# Export CSVs by category
|
||||
export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
|
||||
|
||||
# Grant category
|
||||
df_grant = df_prot[df_prot['category'] == 'Grant']
|
||||
df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
|
||||
print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
|
||||
|
||||
# Kindred category
|
||||
df_kindred = df_prot[df_prot['category'] == 'Kindred']
|
||||
df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
|
||||
print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
|
||||
|
||||
# Mixed category
|
||||
df_mixed = df_prot[df_prot['category'] == 'Mixed']
|
||||
df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
|
||||
print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
|
||||
|
||||
# ConditionalSelf category
|
||||
df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
|
||||
df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
|
||||
print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
|
||||
|
||||
# Inherent category
|
||||
df_inherent = df_prot[df_prot['category'] == 'Inherent']
|
||||
df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
|
||||
print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
|
||||
|
||||
# Opponent category
|
||||
df_opponent = df_prot[df_prot['category'] == 'Opponent']
|
||||
df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
|
||||
print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
|
||||
|
||||
# Neither category
|
||||
df_neither = df_prot[df_prot['category'] == 'Neither']
|
||||
df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
|
||||
print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
|
||||
|
||||
# All cards
|
||||
df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
|
||||
print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
|
||||
|
||||
print(f"\nAll files saved to: {output_dir}")
|
||||
|
||||
return df_prot, summary
|
||||
|
||||
if __name__ == '__main__':
|
||||
df_results, summary = audit_all_protection_cards()
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
# Standard library imports
|
||||
import os
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# ----------------------------------------------------------------------------------
|
||||
|
|
@ -98,4 +99,17 @@ CSV_DIRECTORY: str = 'csv_files'
|
|||
FILL_NA_COLUMNS: Dict[str, Optional[str]] = {
|
||||
'colorIdentity': 'Colorless', # Default color identity for cards without one
|
||||
'faceName': None # Use card's name column value when face name is not available
|
||||
}
|
||||
}
|
||||
|
||||
# ----------------------------------------------------------------------------------
|
||||
# TAGGING REFINEMENT FEATURE FLAGS (M1-M3)
|
||||
# ----------------------------------------------------------------------------------
|
||||
|
||||
# M1: Enable keyword normalization and singleton pruning
|
||||
TAG_NORMALIZE_KEYWORDS = os.getenv('TAG_NORMALIZE_KEYWORDS', '1').lower() not in ('0', 'false', 'off', 'disabled')
|
||||
|
||||
# M2: Enable protection grant detection (planned)
|
||||
TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECT ION_GRANTS', '0').lower() not in ('0', 'false', 'off', 'disabled')
|
||||
|
||||
# M3: Enable metadata/theme partition (planned)
|
||||
TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '0').lower() not in ('0', 'false', 'off', 'disabled')
|
||||
493
code/tagging/protection_grant_detection.py
Normal file
493
code/tagging/protection_grant_detection.py
Normal file
|
|
@ -0,0 +1,493 @@
|
|||
"""
|
||||
Protection grant detection implementation for M2.
|
||||
|
||||
This module provides helpers to distinguish cards that grant protection effects
|
||||
from cards that have inherent protection effects.
|
||||
|
||||
Usage in tagger.py:
|
||||
from code.tagging.protection_grant_detection import is_granting_protection
|
||||
|
||||
if is_granting_protection(text, keywords):
|
||||
# Tag as Protection
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Set, List, Pattern
|
||||
|
||||
from code.tagging.tag_constants import CREATURE_TYPES
|
||||
|
||||
|
||||
# Pre-compile kindred detection patterns at module load for performance
|
||||
# Pattern: (compiled_regex, tag_name_template)
|
||||
KINDRED_PATTERNS: List[tuple[Pattern, str]] = []
|
||||
|
||||
def _init_kindred_patterns():
|
||||
"""Initialize pre-compiled kindred patterns for all creature types."""
|
||||
global KINDRED_PATTERNS
|
||||
if KINDRED_PATTERNS:
|
||||
return # Already initialized
|
||||
|
||||
for creature_type in CREATURE_TYPES:
|
||||
creature_lower = creature_type.lower()
|
||||
creature_escaped = re.escape(creature_lower)
|
||||
tag_name = f"{creature_type}s Gain Protection"
|
||||
|
||||
# Create 3 patterns per type
|
||||
patterns_to_compile = [
|
||||
(rf'\bother {creature_escaped}s?\b.*\b(have|gain)\b', tag_name),
|
||||
(rf'\b{creature_escaped} creatures?\b.*\b(have|gain)\b', tag_name),
|
||||
(rf'\btarget {creature_escaped}\b.*\bgains?\b', tag_name),
|
||||
]
|
||||
|
||||
for pattern_str, tag in patterns_to_compile:
|
||||
try:
|
||||
compiled = re.compile(pattern_str, re.IGNORECASE)
|
||||
KINDRED_PATTERNS.append((compiled, tag))
|
||||
except re.error:
|
||||
# Skip patterns that fail to compile
|
||||
pass
|
||||
|
||||
|
||||
# Grant verb patterns - cards that give protection to other permanents
|
||||
# These patterns look for grant verbs that affect OTHER permanents, not self
|
||||
GRANT_VERB_PATTERNS = [
|
||||
r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection)\b', # "gets +X/+X and has" pattern
|
||||
]
|
||||
|
||||
# Self-reference patterns that should NOT count as granting
|
||||
# Reminder text and keyword lines only
|
||||
SELF_REFERENCE_PATTERNS = [
|
||||
r'^\s*(hexproof|shroud|indestructible|ward|protection)', # Start of text (keyword ability)
|
||||
r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection)[^)]*\)', # Reminder text in parens
|
||||
]
|
||||
|
||||
# Conditional self-grant patterns - activated/triggered abilities that grant to self
|
||||
CONDITIONAL_SELF_GRANT_PATTERNS = [
|
||||
# Activated abilities
|
||||
r'\{[^}]*\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'discard.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
|
||||
r'\{t\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
|
||||
r'sacrifice.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
|
||||
r'pay.*life.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
|
||||
# Triggered abilities that grant to self only
|
||||
r'whenever.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'whenever you (cast|play|attack|cycle|discard|commit).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'at the beginning.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'whenever.*\b(this creature|this permanent)\b (attacks|enters|becomes).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b',
|
||||
# Named self-references (e.g., "Pristine Skywise gains")
|
||||
r'whenever you cast.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'whenever you.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
# Static conditional abilities (as long as, if you control X)
|
||||
r'as long as.*\b(this creature|this permanent|it|has)\b.*(has|gains?).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
]
|
||||
|
||||
# Mass grant patterns - affects multiple creatures YOU control
|
||||
MASS_GRANT_PATTERNS = [
|
||||
r'creatures you control (have|gain|get)',
|
||||
r'other .* you control (have|gain|get)',
|
||||
r'(artifacts?|enchantments?|permanents?) you control (have|gain|get)', # Artifacts you control have...
|
||||
r'other (creatures?|artifacts?|enchantments?) (have|gain|get)', # Other creatures have...
|
||||
r'all (creatures?|slivers?|permanents?) (have|gain|get)', # All creatures/slivers have...
|
||||
]
|
||||
|
||||
# Targeted grant patterns - must specify "you control"
|
||||
TARGETED_GRANT_PATTERNS = [
|
||||
r'target .* you control (gains?|gets?|has)',
|
||||
r'equipped creature (gains?|gets?|has)',
|
||||
r'enchanted creature (gains?|gets?|has)',
|
||||
]
|
||||
|
||||
# Exclusion patterns - cards that remove or prevent protection
|
||||
EXCLUSION_PATTERNS = [
|
||||
r"can't have (hexproof|indestructible|ward|shroud)",
|
||||
r"lose[s]? (hexproof|indestructible|ward|shroud|protection)",
|
||||
r"without (hexproof|indestructible|ward|shroud)",
|
||||
r"protection from.*can't",
|
||||
]
|
||||
|
||||
# Opponent grant patterns - grants to opponent's permanents (EXCLUDE these)
|
||||
OPPONENT_GRANT_PATTERNS = [
|
||||
r'target opponent',
|
||||
r'each opponent',
|
||||
r'all creatures', # "all creatures" without "you control"
|
||||
r'all permanents', # "all permanents" without "you control"
|
||||
r'each player',
|
||||
r'each creature', # "each creature" without "you control"
|
||||
]
|
||||
|
||||
# Kindred-specific grant patterns for metadata tagging
|
||||
KINDRED_GRANT_PATTERNS = {
|
||||
'Knights Gain Protection': [
|
||||
r'knight[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other knight[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Merfolk Gain Protection': [
|
||||
r'merfolk you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other merfolk.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Zombies Gain Protection': [
|
||||
r'zombie[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other zombie[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'target.*zombie.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Vampires Gain Protection': [
|
||||
r'vampire[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other vampire[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Elves Gain Protection': [
|
||||
r'el(f|ves) you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other el(f|ves).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Dragons Gain Protection': [
|
||||
r'dragon[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other dragon[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Goblins Gain Protection': [
|
||||
r'goblin[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other goblin[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Slivers Gain Protection': [
|
||||
r'sliver[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'all sliver[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other sliver[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Artifacts Gain Protection': [
|
||||
r'artifact[s]? you control (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other artifact[s]? (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
'Enchantments Gain Protection': [
|
||||
r'enchantment[s]? you control (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
r'other enchantment[s]? (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
|
||||
],
|
||||
}
|
||||
|
||||
# Protection keyword patterns for inherent check
|
||||
PROTECTION_KEYWORDS = {
|
||||
'hexproof',
|
||||
'shroud',
|
||||
'indestructible',
|
||||
'ward',
|
||||
'protection from',
|
||||
'protection',
|
||||
}
|
||||
|
||||
|
||||
def get_kindred_protection_tags(text: str) -> Set[str]:
|
||||
"""
|
||||
Identify kindred-specific protection grants for metadata tagging.
|
||||
|
||||
Returns a set of metadata tag names like "Knights Gain Protection".
|
||||
|
||||
Uses both predefined patterns and dynamic creature type detection.
|
||||
"""
|
||||
if not text:
|
||||
return set()
|
||||
|
||||
# Initialize pre-compiled patterns if needed
|
||||
_init_kindred_patterns()
|
||||
|
||||
text_lower = text.lower()
|
||||
tags = set()
|
||||
|
||||
# Check predefined patterns (specific kindred types we track)
|
||||
for tag_name, patterns in KINDRED_GRANT_PATTERNS.items():
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
tags.add(tag_name)
|
||||
break # Found match for this kindred type, move to next
|
||||
|
||||
# Only check dynamic patterns if protection keywords present (performance optimization)
|
||||
if not any(keyword in text_lower for keyword in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']):
|
||||
return tags
|
||||
|
||||
# Use pre-compiled patterns for all creature types
|
||||
for compiled_pattern, tag_name in KINDRED_PATTERNS:
|
||||
if compiled_pattern.search(text_lower):
|
||||
tags.add(tag_name)
|
||||
# Don't break - a card could grant to multiple creature types
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def is_opponent_grant(text: str) -> bool:
|
||||
"""
|
||||
Check if card grants protection to opponent's permanents or all permanents.
|
||||
|
||||
Returns True if this grants to opponents (should be excluded from Protection tag).
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
text_lower = text.lower()
|
||||
|
||||
# Check for opponent grant patterns
|
||||
for pattern in OPPONENT_GRANT_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
# Make sure it's not "target opponent" for a different effect
|
||||
# Must be in context of granting protection
|
||||
if any(prot in text_lower for prot in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']):
|
||||
# Check if "you control" appears in same sentence
|
||||
if 'you control' not in text_lower.split('.')[0]:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def has_conditional_self_grant(text: str) -> bool:
|
||||
"""
|
||||
Check if card has any conditional self-grant patterns.
|
||||
This does NOT check if it ALSO grants to others.
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
text_lower = text.lower()
|
||||
|
||||
# Check for conditional self-grant patterns (activated/triggered abilities)
|
||||
for pattern in CONDITIONAL_SELF_GRANT_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_conditional_self_grant(text: str) -> bool:
|
||||
"""
|
||||
Check if card only conditionally grants protection to itself.
|
||||
|
||||
Examples:
|
||||
- "{B}, Discard a card: This creature gains hexproof until end of turn."
|
||||
- "Whenever you cast a noncreature spell, untap this creature. It gains protection..."
|
||||
- "Whenever this creature attacks, it gains indestructible until end of turn."
|
||||
|
||||
These should be excluded as they don't provide protection to OTHER permanents.
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
text_lower = text.lower()
|
||||
|
||||
# Check if it has conditional self-grant patterns
|
||||
found_conditional_self = has_conditional_self_grant(text)
|
||||
|
||||
if not found_conditional_self:
|
||||
return False
|
||||
|
||||
# If we found a conditional self-grant, check if there's ALSO a grant to others
|
||||
# Look for patterns that grant to creatures besides itself
|
||||
has_other_grant = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in [
|
||||
r'other creatures',
|
||||
r'creatures you control (have|gain)',
|
||||
r'target (creature|permanent) you control gains',
|
||||
r'another target (creature|permanent)',
|
||||
r'equipped creature (has|gains)',
|
||||
r'enchanted creature (has|gains)',
|
||||
r'target legendary',
|
||||
r'permanents you control gain',
|
||||
])
|
||||
|
||||
# Return True only if it's ONLY conditional self-grants (no other grants)
|
||||
return not has_other_grant
|
||||
|
||||
|
||||
def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = False) -> bool:
|
||||
"""
|
||||
Determine if a card grants protection effects to other permanents.
|
||||
|
||||
Returns True if the card gives/grants protection to other cards unconditionally.
|
||||
Returns False if:
|
||||
- Card only has inherent protection
|
||||
- Card only conditionally grants to itself
|
||||
- Card grants to opponent's permanents
|
||||
- Card grants only to specific kindred types (when exclude_kindred=True)
|
||||
- Card creates tokens with protection (not granting to existing permanents)
|
||||
- Card only modifies non-protection stats of other permanents
|
||||
|
||||
Args:
|
||||
text: Card text to analyze
|
||||
keywords: Card keywords (comma-separated)
|
||||
exclude_kindred: If True, exclude kindred-specific grants
|
||||
|
||||
Returns:
|
||||
True if card grants broad protection, False otherwise
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
text_lower = text.lower()
|
||||
|
||||
# EXCLUDE: Opponent grants
|
||||
if is_opponent_grant(text):
|
||||
return False
|
||||
|
||||
# EXCLUDE: Conditional self-grants only
|
||||
if is_conditional_self_grant(text):
|
||||
return False
|
||||
|
||||
# EXCLUDE: Cards that remove protection
|
||||
for pattern in EXCLUSION_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
# EXCLUDE: Token creation with protection (not granting to existing permanents)
|
||||
if re.search(r'create.*token.*with.*(hexproof|shroud|indestructible|ward|protection)', text_lower, re.IGNORECASE):
|
||||
# Check if there's ALSO granting to other permanents
|
||||
has_grant_to_others = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in MASS_GRANT_PATTERNS)
|
||||
if not has_grant_to_others:
|
||||
return False
|
||||
|
||||
# EXCLUDE: Kindred-specific grants if requested
|
||||
if exclude_kindred:
|
||||
kindred_tags = get_kindred_protection_tags(text)
|
||||
if kindred_tags:
|
||||
# If we detected kindred tags, check if there's ALSO a non-kindred grant
|
||||
# Look for grant patterns that explicitly grant to ALL creatures/permanents broadly
|
||||
has_broad_grant = False
|
||||
|
||||
# Patterns that indicate truly broad grants (not type-specific)
|
||||
broad_only_patterns = [
|
||||
r'\bcreatures you control (have|gain)\b(?!.*(knight|merfolk|zombie|elf|dragon|goblin|sliver))', # Only if not followed by type
|
||||
r'\bpermanents you control (have|gain)\b',
|
||||
r'\beach (creature|permanent) you control',
|
||||
r'\ball (creatures?|permanents?)',
|
||||
]
|
||||
|
||||
for pattern in broad_only_patterns:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
has_broad_grant = True
|
||||
break
|
||||
|
||||
if not has_broad_grant:
|
||||
return False # Only kindred grants, exclude
|
||||
|
||||
# Check if card has inherent protection keywords
|
||||
has_inherent = False
|
||||
if keywords:
|
||||
keywords_lower = keywords.lower()
|
||||
has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS)
|
||||
|
||||
# Check for explicit grants with protection keywords
|
||||
found_grant = False
|
||||
|
||||
# Mass grant patterns (creatures you control have/gain)
|
||||
for pattern in MASS_GRANT_PATTERNS:
|
||||
match = re.search(pattern, text_lower, re.IGNORECASE)
|
||||
if match:
|
||||
# Check if protection keyword appears in the same sentence or nearby (within 70 chars AFTER the match)
|
||||
# This ensures we're looking at "creatures you control HAVE hexproof" not just having both phrases
|
||||
context_start = match.start()
|
||||
context_end = min(len(text_lower), match.end() + 70)
|
||||
context = text_lower[context_start:context_end]
|
||||
|
||||
if any(prot in context for prot in PROTECTION_KEYWORDS):
|
||||
found_grant = True
|
||||
break
|
||||
|
||||
# Targeted grant patterns (target creature gains)
|
||||
if not found_grant:
|
||||
for pattern in TARGETED_GRANT_PATTERNS:
|
||||
match = re.search(pattern, text_lower, re.IGNORECASE)
|
||||
if match:
|
||||
# Check if protection keyword appears after the grant verb (within 70 chars)
|
||||
context_start = match.start()
|
||||
context_end = min(len(text_lower), match.end() + 70)
|
||||
context = text_lower[context_start:context_end]
|
||||
|
||||
if any(prot in context for prot in PROTECTION_KEYWORDS):
|
||||
found_grant = True
|
||||
break
|
||||
|
||||
# Grant verb patterns (creature gains/gets hexproof)
|
||||
if not found_grant:
|
||||
for pattern in GRANT_VERB_PATTERNS:
|
||||
if re.search(pattern, text_lower, re.IGNORECASE):
|
||||
found_grant = True
|
||||
break
|
||||
|
||||
# If we have inherent protection and the ONLY text is about stats (no grant words), exclude
|
||||
if has_inherent and not found_grant:
|
||||
# Check if text only talks about other stats (power/toughness, +X/+X)
|
||||
has_stat_only = bool(re.search(r'(get[s]?|gain[s]?)\s+[+\-][0-9X]+/[+\-][0-9X]+', text_lower))
|
||||
# Check if text mentions "other" without protection keywords
|
||||
mentions_other_without_prot = 'other' in text_lower and not any(prot in text_lower for prot in PROTECTION_KEYWORDS if prot in text_lower[text_lower.find('other'):])
|
||||
|
||||
if has_stat_only or mentions_other_without_prot:
|
||||
return False
|
||||
|
||||
return found_grant
|
||||
|
||||
|
||||
def categorize_protection_card(name: str, text: str, keywords: str, card_type: str, exclude_kindred: bool = False) -> str:
|
||||
"""
|
||||
Categorize a Protection-tagged card for audit purposes.
|
||||
|
||||
Args:
|
||||
name: Card name
|
||||
text: Card text
|
||||
keywords: Card keywords
|
||||
card_type: Card type line
|
||||
exclude_kindred: If True, kindred-specific grants are categorized as metadata, not Grant
|
||||
|
||||
Returns:
|
||||
'Grant' - gives broad protection to others
|
||||
'Kindred' - gives kindred-specific protection (metadata tag)
|
||||
'Inherent' - has protection itself
|
||||
'ConditionalSelf' - only conditionally grants to itself
|
||||
'Opponent' - grants to opponent's permanents
|
||||
'Neither' - false positive
|
||||
"""
|
||||
keywords_lower = keywords.lower() if keywords else ''
|
||||
|
||||
# Check for opponent grants first
|
||||
if is_opponent_grant(text):
|
||||
return 'Opponent'
|
||||
|
||||
# Check for conditional self-grants (ONLY self, no other grants)
|
||||
if is_conditional_self_grant(text):
|
||||
return 'ConditionalSelf'
|
||||
|
||||
# Check if it has conditional self-grant (may also have other grants)
|
||||
has_cond_self = has_conditional_self_grant(text)
|
||||
|
||||
# Check if it has inherent protection
|
||||
has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS)
|
||||
|
||||
# Check for kindred-specific grants
|
||||
kindred_tags = get_kindred_protection_tags(text)
|
||||
if kindred_tags and exclude_kindred:
|
||||
# Check if there's ALSO a broad grant (excluding kindred)
|
||||
grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
|
||||
|
||||
if grants_broad and has_inherent:
|
||||
# Has inherent + kindred + broad grants
|
||||
return 'Mixed'
|
||||
elif grants_broad:
|
||||
# Has kindred + broad grants (but no inherent)
|
||||
# This is just Grant with kindred metadata tags
|
||||
return 'Grant'
|
||||
elif has_inherent:
|
||||
# Has inherent + kindred only (not broad)
|
||||
# This is still just Kindred category (inherent is separate from granting)
|
||||
return 'Kindred'
|
||||
else:
|
||||
# Only kindred grants, no inherent or broad
|
||||
return 'Kindred'
|
||||
|
||||
# Check if it grants protection broadly (not kindred-specific)
|
||||
grants_protection = is_granting_protection(text, keywords, exclude_kindred=exclude_kindred)
|
||||
|
||||
# Categorize based on what it does
|
||||
if grants_protection and has_cond_self:
|
||||
# Has conditional self-grant + grants to others = Mixed
|
||||
return 'Mixed'
|
||||
elif grants_protection and has_inherent:
|
||||
return 'Mixed' # Has inherent + grants broadly
|
||||
elif grants_protection:
|
||||
return 'Grant' # Only grants broadly
|
||||
elif has_inherent:
|
||||
return 'Inherent' # Only has inherent
|
||||
else:
|
||||
return 'Neither' # False positive
|
||||
|
|
@ -849,4 +849,89 @@ TOPDECK_EXCLUSION_PATTERNS: List[str] = [
|
|||
'from the top of their library',
|
||||
'look at the top card of target player\'s library',
|
||||
'reveal the top card of target player\'s library'
|
||||
]
|
||||
|
||||
# ==============================================================================
|
||||
# Keyword Normalization (M1 - Tagging Refinement)
|
||||
# ==============================================================================
|
||||
|
||||
# Keyword normalization map: variant -> canonical
|
||||
# Maps Commander-specific and variant keywords to their canonical forms
|
||||
KEYWORD_NORMALIZATION_MAP: Dict[str, str] = {
|
||||
# Commander variants
|
||||
'Commander ninjutsu': 'Ninjutsu',
|
||||
'Commander Ninjutsu': 'Ninjutsu',
|
||||
|
||||
# Partner variants (already excluded but mapped for reference)
|
||||
'Partner with': 'Partner',
|
||||
'Choose a Background': 'Choose a Background', # Keep distinct
|
||||
"Doctor's Companion": "Doctor's Companion", # Keep distinct
|
||||
|
||||
# Case normalization for common keywords (most are already correct)
|
||||
'flying': 'Flying',
|
||||
'trample': 'Trample',
|
||||
'vigilance': 'Vigilance',
|
||||
'haste': 'Haste',
|
||||
'deathtouch': 'Deathtouch',
|
||||
'lifelink': 'Lifelink',
|
||||
'menace': 'Menace',
|
||||
'reach': 'Reach',
|
||||
}
|
||||
|
||||
# Keywords that should never appear in theme tags
|
||||
# Already excluded during keyword tagging, but documented here
|
||||
KEYWORD_EXCLUSION_SET: set[str] = {
|
||||
'partner', # Already excluded in tag_for_keywords
|
||||
}
|
||||
|
||||
# Keyword allowlist - keywords that should survive singleton pruning
|
||||
# Seeded from top keywords and theme whitelist
|
||||
KEYWORD_ALLOWLIST: set[str] = {
|
||||
# Evergreen keywords (top 50 from baseline)
|
||||
'Flying', 'Enchant', 'Trample', 'Vigilance', 'Haste', 'Equip', 'Flash',
|
||||
'Mill', 'Scry', 'Transform', 'Cycling', 'First strike', 'Reach', 'Menace',
|
||||
'Lifelink', 'Treasure', 'Defender', 'Deathtouch', 'Kicker', 'Flashback',
|
||||
'Protection', 'Surveil', 'Landfall', 'Crew', 'Ward', 'Morph', 'Devoid',
|
||||
'Investigate', 'Fight', 'Food', 'Partner', 'Double strike', 'Indestructible',
|
||||
'Threshold', 'Proliferate', 'Convoke', 'Hexproof', 'Cumulative upkeep',
|
||||
'Goad', 'Delirium', 'Prowess', 'Suspend', 'Affinity', 'Madness', 'Manifest',
|
||||
'Amass', 'Domain', 'Unearth', 'Explore', 'Changeling',
|
||||
|
||||
# Additional important mechanics
|
||||
'Myriad', 'Cascade', 'Storm', 'Dredge', 'Delve', 'Escape', 'Mutate',
|
||||
'Ninjutsu', 'Overload', 'Rebound', 'Retrace', 'Bloodrush', 'Cipher',
|
||||
'Extort', 'Evolve', 'Undying', 'Persist', 'Wither', 'Infect', 'Annihilator',
|
||||
'Exalted', 'Phasing', 'Shadow', 'Horsemanship', 'Banding', 'Rampage',
|
||||
'Shroud', 'Split second', 'Totem armor', 'Living weapon', 'Undaunted',
|
||||
'Improvise', 'Surge', 'Emerge', 'Escalate', 'Meld', 'Partner', 'Afflict',
|
||||
'Aftermath', 'Embalm', 'Eternalize', 'Exert', 'Fabricate', 'Improvise',
|
||||
'Assist', 'Jump-start', 'Mentor', 'Riot', 'Spectacle', 'Addendum',
|
||||
'Afterlife', 'Adapt', 'Enrage', 'Ascend', 'Learn', 'Boast', 'Foretell',
|
||||
'Squad', 'Encore', 'Daybound', 'Nightbound', 'Disturb', 'Cleave', 'Training',
|
||||
'Reconfigure', 'Blitz', 'Casualty', 'Connive', 'Hideaway', 'Prototype',
|
||||
'Read ahead', 'Living metal', 'More than meets the eye', 'Ravenous',
|
||||
'Squad', 'Toxic', 'For Mirrodin!', 'Backup', 'Bargain', 'Craft', 'Freerunning',
|
||||
'Plot', 'Spree', 'Offspring', 'Bestow', 'Monstrosity', 'Tribute',
|
||||
|
||||
# Partner mechanics (distinct types)
|
||||
'Choose a Background', "Doctor's Companion",
|
||||
|
||||
# Token types (frequently used)
|
||||
'Blood', 'Clue', 'Food', 'Gold', 'Treasure', 'Powerstone',
|
||||
|
||||
# Common ability words
|
||||
'Landfall', 'Raid', 'Revolt', 'Threshold', 'Metalcraft', 'Morbid',
|
||||
'Bloodthirst', 'Battalion', 'Channel', 'Grandeur', 'Kinship', 'Sweep',
|
||||
'Radiance', 'Join forces', 'Fateful hour', 'Inspired', 'Heroic',
|
||||
'Constellation', 'Strive', 'Prowess', 'Ferocious', 'Formidable', 'Renown',
|
||||
'Tempting offer', 'Will of the council', 'Parley', 'Adamant', 'Devotion',
|
||||
}
|
||||
|
||||
# Metadata tag prefixes (for M3 - metadata partition)
|
||||
# Tags matching these patterns should be classified as metadata, not themes
|
||||
METADATA_TAG_PREFIXES: List[str] = [
|
||||
'Applied:',
|
||||
'Bracket:',
|
||||
'Diagnostic:',
|
||||
'Internal:',
|
||||
]
|
||||
|
|
@ -509,4 +509,77 @@ def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series[bool]:
|
|||
damage_mask = create_text_mask(df, number_patterns)
|
||||
target_mask = create_text_mask(df, target_patterns)
|
||||
|
||||
return damage_mask & target_mask
|
||||
return damage_mask & target_mask
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# Keyword Normalization (M1 - Tagging Refinement)
|
||||
# ==============================================================================
|
||||
|
||||
def normalize_keywords(
|
||||
raw: Union[List[str], Set[str], Tuple[str, ...]],
|
||||
allowlist: Set[str],
|
||||
frequency_map: dict[str, int]
|
||||
) -> list[str]:
|
||||
"""Normalize keyword strings for theme tagging.
|
||||
|
||||
Applies normalization rules:
|
||||
1. Case normalization (via normalization map)
|
||||
2. Canonical mapping (e.g., "Commander Ninjutsu" -> "Ninjutsu")
|
||||
3. Singleton pruning (unless allowlisted)
|
||||
4. Deduplication
|
||||
5. Exclusion of blacklisted keywords
|
||||
|
||||
Args:
|
||||
raw: Iterable of raw keyword strings
|
||||
allowlist: Set of keywords that should survive singleton pruning
|
||||
frequency_map: Dict mapping keywords to their occurrence count
|
||||
|
||||
Returns:
|
||||
Deduplicated list of normalized keywords
|
||||
|
||||
Raises:
|
||||
ValueError: If raw is not iterable
|
||||
|
||||
Examples:
|
||||
>>> normalize_keywords(
|
||||
... ['Commander Ninjutsu', 'Flying', 'Allons-y!'],
|
||||
... {'Flying', 'Ninjutsu'},
|
||||
... {'Commander Ninjutsu': 2, 'Flying': 100, 'Allons-y!': 1}
|
||||
... )
|
||||
['Ninjutsu', 'Flying'] # 'Allons-y!' pruned as singleton
|
||||
"""
|
||||
if not hasattr(raw, '__iter__') or isinstance(raw, (str, bytes)):
|
||||
raise ValueError(f"raw must be iterable, got {type(raw)}")
|
||||
|
||||
normalized_keywords: set[str] = set()
|
||||
|
||||
for keyword in raw:
|
||||
# Skip non-string entries
|
||||
if not isinstance(keyword, str):
|
||||
continue
|
||||
|
||||
# Skip empty strings
|
||||
keyword = keyword.strip()
|
||||
if not keyword:
|
||||
continue
|
||||
|
||||
# Skip excluded keywords
|
||||
if keyword.lower() in tag_constants.KEYWORD_EXCLUSION_SET:
|
||||
continue
|
||||
|
||||
# Apply normalization map
|
||||
normalized = tag_constants.KEYWORD_NORMALIZATION_MAP.get(keyword, keyword)
|
||||
|
||||
# Check if singleton (unless allowlisted)
|
||||
frequency = frequency_map.get(keyword, 0)
|
||||
is_singleton = frequency == 1
|
||||
is_allowlisted = normalized in allowlist or keyword in allowlist
|
||||
|
||||
# Prune singletons that aren't allowlisted
|
||||
if is_singleton and not is_allowlisted:
|
||||
continue
|
||||
|
||||
normalized_keywords.add(normalized)
|
||||
|
||||
return sorted(list(normalized_keywords))
|
||||
|
|
@ -580,6 +580,11 @@ def add_creatures_to_tags(df: pd.DataFrame, color: str) -> None:
|
|||
## Add keywords to theme tags
|
||||
def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
|
||||
"""Tag cards based on their keywords using vectorized operations.
|
||||
|
||||
When TAG_NORMALIZE_KEYWORDS is enabled, applies normalization:
|
||||
- Canonical mapping (e.g., "Commander Ninjutsu" -> "Ninjutsu")
|
||||
- Singleton pruning (unless allowlisted)
|
||||
- Case normalization
|
||||
|
||||
Args:
|
||||
df: DataFrame containing card data
|
||||
|
|
@ -589,6 +594,20 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
|
|||
start_time = pd.Timestamp.now()
|
||||
|
||||
try:
|
||||
from settings import TAG_NORMALIZE_KEYWORDS
|
||||
|
||||
# Load frequency map if normalization is enabled
|
||||
frequency_map: dict[str, int] = {}
|
||||
if TAG_NORMALIZE_KEYWORDS:
|
||||
freq_map_path = Path(__file__).parent / 'keyword_frequency_map.json'
|
||||
if freq_map_path.exists():
|
||||
with open(freq_map_path, 'r', encoding='utf-8') as f:
|
||||
frequency_map = json.load(f)
|
||||
logger.info('Loaded keyword frequency map with %d entries', len(frequency_map))
|
||||
else:
|
||||
logger.warning('Keyword frequency map not found, normalization disabled for this run')
|
||||
TAG_NORMALIZE_KEYWORDS = False
|
||||
|
||||
# Create mask for valid keywords
|
||||
has_keywords = pd.notna(df['keywords'])
|
||||
|
||||
|
|
@ -608,17 +627,29 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
|
|||
else:
|
||||
keywords_iterable = []
|
||||
|
||||
filtered_keywords = [
|
||||
kw for kw in keywords_iterable
|
||||
if kw and kw.lower() not in exclusion_keywords
|
||||
]
|
||||
|
||||
return sorted(list(set(base_tags + filtered_keywords)))
|
||||
# Apply normalization if enabled
|
||||
if TAG_NORMALIZE_KEYWORDS and frequency_map:
|
||||
normalized_keywords = tag_utils.normalize_keywords(
|
||||
keywords_iterable,
|
||||
tag_constants.KEYWORD_ALLOWLIST,
|
||||
frequency_map
|
||||
)
|
||||
return sorted(list(set(base_tags + normalized_keywords)))
|
||||
else:
|
||||
# Legacy behavior: simple exclusion filter
|
||||
filtered_keywords = [
|
||||
kw for kw in keywords_iterable
|
||||
if kw and kw.lower() not in exclusion_keywords
|
||||
]
|
||||
return sorted(list(set(base_tags + filtered_keywords)))
|
||||
|
||||
df.loc[has_keywords, 'themeTags'] = keywords_df.apply(_merge_keywords, axis=1)
|
||||
|
||||
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
||||
logger.info('Tagged %d cards with keywords in %.2f seconds', has_keywords.sum(), duration)
|
||||
|
||||
if TAG_NORMALIZE_KEYWORDS:
|
||||
logger.info('Keyword normalization enabled for %s', color)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Error tagging keywords: %s', str(e))
|
||||
|
|
@ -7000,6 +7031,9 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None:
|
|||
- Ward
|
||||
- Phase out
|
||||
|
||||
With TAG_PROTECTION_GRANTS=1, only tags cards that grant protection to other
|
||||
permanents, filtering out cards with inherent protection.
|
||||
|
||||
The function uses helper functions to identify different types of protection
|
||||
and applies tags consistently using vectorized operations.
|
||||
|
||||
|
|
@ -7025,13 +7059,47 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None:
|
|||
required_cols = {'text', 'themeTags', 'keywords'}
|
||||
tag_utils.validate_dataframe_columns(df, required_cols)
|
||||
|
||||
# Create masks for different protection patterns
|
||||
text_mask = create_protection_text_mask(df)
|
||||
keyword_mask = create_protection_keyword_mask(df)
|
||||
exclusion_mask = create_protection_exclusion_mask(df)
|
||||
# Check if grant detection is enabled (M2 feature flag)
|
||||
use_grant_detection = os.getenv('TAG_PROTECTION_GRANTS', '1').lower() in ('1', 'true', 'yes')
|
||||
|
||||
# Combine masks
|
||||
final_mask = (text_mask | keyword_mask) & ~exclusion_mask
|
||||
if use_grant_detection:
|
||||
# M2: Use grant detection to filter out inherent-only protection
|
||||
from code.tagging.protection_grant_detection import is_granting_protection, get_kindred_protection_tags
|
||||
|
||||
# Create a grant detection mask
|
||||
grant_mask = df.apply(
|
||||
lambda row: is_granting_protection(
|
||||
str(row.get('text', '')),
|
||||
str(row.get('keywords', ''))
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
final_mask = grant_mask
|
||||
logger.info(f'Using M2 grant detection (TAG_PROTECTION_GRANTS=1)')
|
||||
|
||||
# Apply kindred metadata tags for creature-type-specific grants
|
||||
kindred_count = 0
|
||||
for idx, row in df[final_mask].iterrows():
|
||||
text = str(row.get('text', ''))
|
||||
kindred_tags = get_kindred_protection_tags(text)
|
||||
|
||||
if kindred_tags:
|
||||
# Add kindred-specific metadata tags
|
||||
current_tags = str(row.get('metadataTags', ''))
|
||||
existing = set(t.strip() for t in current_tags.split(',') if t.strip())
|
||||
existing.update(kindred_tags)
|
||||
df.at[idx, 'metadataTags'] = ', '.join(sorted(existing))
|
||||
kindred_count += 1
|
||||
|
||||
if kindred_count > 0:
|
||||
logger.info(f'Applied kindred metadata tags to {kindred_count} cards')
|
||||
else:
|
||||
# Legacy: Use original text/keyword patterns
|
||||
text_mask = create_protection_text_mask(df)
|
||||
keyword_mask = create_protection_keyword_mask(df)
|
||||
exclusion_mask = create_protection_exclusion_mask(df)
|
||||
final_mask = (text_mask | keyword_mask) & ~exclusion_mask
|
||||
|
||||
# Apply tags via rules engine
|
||||
tag_utils.apply_rules(df, rules=[
|
||||
|
|
|
|||
182
code/tests/test_keyword_normalization.py
Normal file
182
code/tests/test_keyword_normalization.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
"""Tests for keyword normalization (M1 - Tagging Refinement)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from code.tagging import tag_utils, tag_constants
|
||||
|
||||
|
||||
class TestKeywordNormalization:
|
||||
"""Test suite for normalize_keywords function."""
|
||||
|
||||
def test_canonical_mappings(self):
|
||||
"""Test that variant keywords map to canonical forms."""
|
||||
raw = ['Commander Ninjutsu', 'Flying', 'Trample']
|
||||
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
||||
frequency_map = {
|
||||
'Commander Ninjutsu': 2,
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Ninjutsu' in result
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Commander Ninjutsu' not in result
|
||||
|
||||
def test_singleton_pruning(self):
|
||||
"""Test that singleton keywords are pruned unless allowlisted."""
|
||||
raw = ['Allons-y!', 'Flying', 'Take 59 Flights of Stairs']
|
||||
allowlist = {'Flying'} # Only Flying is allowlisted
|
||||
frequency_map = {
|
||||
'Allons-y!': 1,
|
||||
'Flying': 100,
|
||||
'Take 59 Flights of Stairs': 1
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Allons-y!' not in result
|
||||
assert 'Take 59 Flights of Stairs' not in result
|
||||
|
||||
def test_case_normalization(self):
|
||||
"""Test that keywords are normalized to proper case."""
|
||||
raw = ['flying', 'TRAMPLE', 'vigilance']
|
||||
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
||||
frequency_map = {
|
||||
'flying': 100,
|
||||
'TRAMPLE': 50,
|
||||
'vigilance': 75
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
# Case normalization happens via the map
|
||||
# If not in map, original case is preserved
|
||||
assert len(result) == 3
|
||||
|
||||
def test_partner_exclusion(self):
|
||||
"""Test that partner keywords remain excluded."""
|
||||
raw = ['Partner', 'Flying', 'Trample']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Partner': 50,
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Partner' not in result # Excluded
|
||||
assert 'partner' not in result
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Test that empty input returns empty list."""
|
||||
result = tag_utils.normalize_keywords([], set(), {})
|
||||
assert result == []
|
||||
|
||||
def test_whitespace_handling(self):
|
||||
"""Test that whitespace is properly stripped."""
|
||||
raw = [' Flying ', 'Trample ', ' Vigilance']
|
||||
allowlist = {'Flying', 'Trample', 'Vigilance'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50,
|
||||
'Vigilance': 75
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert 'Vigilance' in result
|
||||
|
||||
def test_deduplication(self):
|
||||
"""Test that duplicate keywords are deduplicated."""
|
||||
raw = ['Flying', 'Flying', 'Trample', 'Flying']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert result.count('Flying') == 1
|
||||
assert result.count('Trample') == 1
|
||||
|
||||
def test_non_string_entries_skipped(self):
|
||||
"""Test that non-string entries are safely skipped."""
|
||||
raw = ['Flying', None, 123, 'Trample', '']
|
||||
allowlist = {'Flying', 'Trample'}
|
||||
frequency_map = {
|
||||
'Flying': 100,
|
||||
'Trample': 50
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Flying' in result
|
||||
assert 'Trample' in result
|
||||
assert len(result) == 2
|
||||
|
||||
def test_invalid_input_raises_error(self):
|
||||
"""Test that non-iterable input raises ValueError."""
|
||||
with pytest.raises(ValueError, match="raw must be iterable"):
|
||||
tag_utils.normalize_keywords("not-a-list", set(), {})
|
||||
|
||||
def test_allowlist_preserves_singletons(self):
|
||||
"""Test that allowlisted keywords survive even if they're singletons."""
|
||||
raw = ['Myriad', 'Flying', 'Cascade']
|
||||
allowlist = {'Flying', 'Myriad', 'Cascade'} # All allowlisted
|
||||
frequency_map = {
|
||||
'Myriad': 1, # Singleton
|
||||
'Flying': 100,
|
||||
'Cascade': 1 # Singleton
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
|
||||
|
||||
assert 'Myriad' in result # Preserved despite being singleton
|
||||
assert 'Flying' in result
|
||||
assert 'Cascade' in result # Preserved despite being singleton
|
||||
|
||||
|
||||
class TestKeywordIntegration:
|
||||
"""Integration tests for keyword normalization in tagging flow."""
|
||||
|
||||
def test_normalization_preserves_evergreen_keywords(self):
|
||||
"""Test that common evergreen keywords are always preserved."""
|
||||
evergreen = ['Flying', 'Trample', 'Vigilance', 'Haste', 'Deathtouch', 'Lifelink']
|
||||
allowlist = tag_constants.KEYWORD_ALLOWLIST
|
||||
frequency_map = {kw: 100 for kw in evergreen} # All common
|
||||
|
||||
result = tag_utils.normalize_keywords(evergreen, allowlist, frequency_map)
|
||||
|
||||
for kw in evergreen:
|
||||
assert kw in result
|
||||
|
||||
def test_crossover_keywords_pruned(self):
|
||||
"""Test that crossover-specific singletons are pruned."""
|
||||
crossover_singletons = [
|
||||
'Gae Bolg', # Final Fantasy
|
||||
'Psychic Defense', # Warhammer 40K
|
||||
'Allons-y!', # Doctor Who
|
||||
'Flying' # Evergreen (control)
|
||||
]
|
||||
allowlist = {'Flying'} # Only Flying allowed
|
||||
frequency_map = {
|
||||
'Gae Bolg': 1,
|
||||
'Psychic Defense': 1,
|
||||
'Allons-y!': 1,
|
||||
'Flying': 100
|
||||
}
|
||||
|
||||
result = tag_utils.normalize_keywords(crossover_singletons, allowlist, frequency_map)
|
||||
|
||||
assert result == ['Flying'] # Only evergreen survived
|
||||
169
code/tests/test_protection_grant_detection.py
Normal file
169
code/tests/test_protection_grant_detection.py
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
"""
|
||||
Tests for protection grant detection (M2).
|
||||
|
||||
Tests the ability to distinguish between cards that grant protection
|
||||
and cards that have inherent protection.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from code.tagging.protection_grant_detection import (
|
||||
is_granting_protection,
|
||||
categorize_protection_card
|
||||
)
|
||||
|
||||
|
||||
class TestGrantDetection:
|
||||
"""Test grant verb detection."""
|
||||
|
||||
def test_gains_hexproof(self):
|
||||
"""Cards with 'gains hexproof' should be detected as granting."""
|
||||
text = "Target creature gains hexproof until end of turn."
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_gives_indestructible(self):
|
||||
"""Cards with 'gives indestructible' should be detected as granting."""
|
||||
text = "This creature gives target creature indestructible."
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_creatures_you_control_have(self):
|
||||
"""Mass grant pattern should be detected."""
|
||||
text = "Creatures you control have hexproof."
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_equipped_creature_gets(self):
|
||||
"""Equipment grant pattern should be detected."""
|
||||
text = "Equipped creature gets +2/+2 and has indestructible."
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
|
||||
class TestInherentDetection:
|
||||
"""Test inherent protection detection."""
|
||||
|
||||
def test_creature_with_hexproof_keyword(self):
|
||||
"""Creature with hexproof keyword should not be detected as granting."""
|
||||
text = "Hexproof (This creature can't be the target of spells or abilities.)"
|
||||
keywords = "Hexproof"
|
||||
assert not is_granting_protection(text, keywords)
|
||||
|
||||
def test_indestructible_artifact(self):
|
||||
"""Artifact with indestructible keyword should not be detected as granting."""
|
||||
text = "Indestructible"
|
||||
keywords = "Indestructible"
|
||||
assert not is_granting_protection(text, keywords)
|
||||
|
||||
def test_ward_creature(self):
|
||||
"""Creature with Ward should not be detected as granting (unless it grants to others)."""
|
||||
text = "Ward {2}"
|
||||
keywords = "Ward"
|
||||
assert not is_granting_protection(text, keywords)
|
||||
|
||||
|
||||
class TestMixedCases:
|
||||
"""Test cards that both grant and have protection."""
|
||||
|
||||
def test_creature_with_self_grant(self):
|
||||
"""Creature that grants itself protection should be detected."""
|
||||
text = "This creature gains indestructible until end of turn."
|
||||
keywords = ""
|
||||
assert is_granting_protection(text, keywords)
|
||||
|
||||
def test_equipment_with_inherent_and_grant(self):
|
||||
"""Equipment with indestructible that grants protection."""
|
||||
text = "Indestructible. Equipped creature has hexproof."
|
||||
keywords = "Indestructible"
|
||||
# Should be detected as granting because of "has hexproof"
|
||||
assert is_granting_protection(text, keywords)
|
||||
|
||||
|
||||
class TestExclusions:
|
||||
"""Test exclusion patterns."""
|
||||
|
||||
def test_cant_have_hexproof(self):
|
||||
"""Cards that prevent protection should not be tagged."""
|
||||
text = "Creatures your opponents control can't have hexproof."
|
||||
assert not is_granting_protection(text, "")
|
||||
|
||||
def test_loses_indestructible(self):
|
||||
"""Cards that remove protection should not be tagged."""
|
||||
text = "Target creature loses indestructible until end of turn."
|
||||
assert not is_granting_protection(text, "")
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and special patterns."""
|
||||
|
||||
def test_protection_from_color(self):
|
||||
"""Protection from [quality] in keywords without grant text."""
|
||||
text = "Protection from red"
|
||||
keywords = "Protection from red"
|
||||
assert not is_granting_protection(text, keywords)
|
||||
|
||||
def test_empty_text(self):
|
||||
"""Empty text should return False."""
|
||||
assert not is_granting_protection("", "")
|
||||
|
||||
def test_none_text(self):
|
||||
"""None text should return False."""
|
||||
assert not is_granting_protection(None, "")
|
||||
|
||||
|
||||
class TestCategorization:
|
||||
"""Test full card categorization."""
|
||||
|
||||
def test_shell_shield_is_grant(self):
|
||||
"""Shell Shield grants hexproof - should be Grant."""
|
||||
text = "Target creature gets +0/+3 and gains hexproof until end of turn."
|
||||
cat = categorize_protection_card("Shell Shield", text, "", "Instant")
|
||||
assert cat == "Grant"
|
||||
|
||||
def test_geist_of_saint_traft_is_mixed(self):
|
||||
"""Geist has hexproof and creates tokens - Mixed."""
|
||||
text = "Hexproof. Whenever this attacks, create a token."
|
||||
keywords = "Hexproof"
|
||||
cat = categorize_protection_card("Geist", text, keywords, "Creature")
|
||||
# Has hexproof keyword, so inherent
|
||||
assert cat in ("Inherent", "Mixed")
|
||||
|
||||
def test_darksteel_brute_is_inherent(self):
|
||||
"""Darksteel Brute has indestructible - should be Inherent."""
|
||||
text = "Indestructible"
|
||||
keywords = "Indestructible"
|
||||
cat = categorize_protection_card("Darksteel Brute", text, keywords, "Artifact")
|
||||
assert cat == "Inherent"
|
||||
|
||||
def test_scion_of_oona_is_grant(self):
|
||||
"""Scion of Oona grants shroud to other faeries - should be Grant."""
|
||||
text = "Other Faeries you control have shroud."
|
||||
keywords = "Flying, Flash"
|
||||
cat = categorize_protection_card("Scion of Oona", text, keywords, "Creature")
|
||||
assert cat == "Grant"
|
||||
|
||||
|
||||
class TestRealWorldCards:
|
||||
"""Test against actual card samples from baseline audit."""
|
||||
|
||||
def test_bulwark_ox(self):
|
||||
"""Bulwark Ox - grants hexproof and indestructible."""
|
||||
text = "Sacrifice: Creatures you control with counters gain hexproof and indestructible"
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_bloodsworn_squire(self):
|
||||
"""Bloodsworn Squire - grants itself indestructible."""
|
||||
text = "This creature gains indestructible until end of turn"
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_kaldra_compleat(self):
|
||||
"""Kaldra Compleat - equipment with indestructible that grants."""
|
||||
text = "Indestructible. Equipped creature gets +5/+5 and has indestructible"
|
||||
keywords = "Indestructible"
|
||||
assert is_granting_protection(text, keywords)
|
||||
|
||||
def test_ward_sliver(self):
|
||||
"""Ward Sliver - grants protection to all slivers."""
|
||||
text = "All Slivers have protection from the chosen color"
|
||||
assert is_granting_protection(text, "")
|
||||
|
||||
def test_rebbec(self):
|
||||
"""Rebbec - grants protection to artifacts."""
|
||||
text = "Artifacts you control have protection from each mana value"
|
||||
assert is_granting_protection(text, "")
|
||||
|
|
@ -170,7 +170,7 @@ def _step5_summary_placeholder_html(token: int, *, message: str | None = None) -
|
|||
return (
|
||||
f'<div id="deck-summary" data-summary '
|
||||
f'hx-get="/build/step5/summary?token={token}" '
|
||||
'hx-trigger="load, step5:refresh from:body" hx-swap="outerHTML">'
|
||||
'hx-trigger="step5:refresh from:body" hx-swap="outerHTML">'
|
||||
f'<div class="muted" style="margin-top:1rem;">{_esc(text)}</div>'
|
||||
'</div>'
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1181,6 +1181,9 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
|
|||
# Only flip phase if previous run finished
|
||||
if st.get('phase') in {'themes','themes-fast'}:
|
||||
st['phase'] = 'done'
|
||||
# Also ensure percent is 100 when done
|
||||
if st.get('finished_at'):
|
||||
st['percent'] = 100
|
||||
with open(status_path, 'w', encoding='utf-8') as _wf:
|
||||
json.dump(st, _wf)
|
||||
except Exception:
|
||||
|
|
@ -1463,16 +1466,17 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Unconditional fallback: if (for any reason) no theme export ran above, perform a fast-path export now.
|
||||
# This guarantees that clicking Run Setup/Tagging always leaves themes current even when tagging wasn't needed.
|
||||
# Conditional fallback: only run theme export if refresh_needed was True but somehow no export performed.
|
||||
# This avoids repeated exports when setup is already complete and _ensure_setup_ready is called again.
|
||||
try:
|
||||
if not theme_export_performed:
|
||||
if not theme_export_performed and refresh_needed:
|
||||
_refresh_theme_catalog(out, force=False, fast_path=True)
|
||||
except Exception:
|
||||
pass
|
||||
else: # If export just ran (either earlier or via fallback), ensure enrichment ran (safety double-call guard inside helper)
|
||||
try:
|
||||
_run_theme_metadata_enrichment(out)
|
||||
if theme_export_performed or refresh_needed:
|
||||
_run_theme_metadata_enrichment(out)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -309,7 +309,8 @@
|
|||
.catch(function(){ /* noop */ });
|
||||
} catch(e) {}
|
||||
}
|
||||
setInterval(pollStatus, 3000);
|
||||
// Poll every 10 seconds instead of 3 to reduce server load (only for header indicator)
|
||||
setInterval(pollStatus, 10000);
|
||||
pollStatus();
|
||||
|
||||
// Health indicator poller
|
||||
|
|
|
|||
|
|
@ -462,11 +462,12 @@
|
|||
<!-- controls now above -->
|
||||
|
||||
{% if allow_must_haves %}
|
||||
{% include "partials/include_exclude_summary.html" with oob=False %}
|
||||
{% set oob = False %}
|
||||
{% include "partials/include_exclude_summary.html" %}
|
||||
{% endif %}
|
||||
<div id="deck-summary" data-summary
|
||||
hx-get="/build/step5/summary?token={{ summary_token }}"
|
||||
hx-trigger="load, step5:refresh from:body"
|
||||
hx-trigger="load once, step5:refresh from:body"
|
||||
hx-swap="outerHTML">
|
||||
<div class="muted" style="margin-top:1rem;">
|
||||
{% if summary_ready %}Loading deck summary…{% else %}Deck summary will appear after the build completes.{% endif %}
|
||||
|
|
|
|||
|
|
@ -127,7 +127,8 @@
|
|||
.then(update)
|
||||
.catch(function(){});
|
||||
}
|
||||
setInterval(poll, 3000);
|
||||
// Poll every 5 seconds instead of 3 to reduce server load
|
||||
setInterval(poll, 5000);
|
||||
poll();
|
||||
})();
|
||||
</script>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue