feat: add keyword normalization and protection grant detection, fix template syntax and polling issues

2026-03-17 10:46:30 +01:00 · 2025-10-08 20:59:51 -07:00 · 2025-10-08 20:59:51 -07:00 · 06d8796316
commit 06d8796316
parent 86ec68acb4
17 changed files with 1692 additions and 611 deletions
--- a/code/scripts/audit_protection_full_v2.py
+++ b/code/scripts/audit_protection_full_v2.py
@ -0,0 +1,203 @@
+"""
+Full audit of Protection-tagged cards with kindred metadata support (M2 Phase 2).
+
+Created: October 8, 2025
+Purpose: Audit and validate Protection tag precision after implementing grant detection.
+         Can be re-run periodically to check tagging quality.
+
+This script audits ALL Protection-tagged cards and categorizes them:
+- Grant: Gives broad protection to other permanents YOU control
+- Kindred: Gives protection to specific creature types (metadata tags)
+- Mixed: Both broad and kindred/inherent
+- Inherent: Only has protection itself
+- ConditionalSelf: Only conditionally grants to itself
+- Opponent: Grants to opponent's permanents
+- Neither: False positive
+
+Outputs:
+- m2_audit_v2.json: Full analysis with summary
+- m2_audit_v2_grant.csv: Cards for main Protection tag
+- m2_audit_v2_kindred.csv: Cards for kindred metadata tags
+- m2_audit_v2_mixed.csv: Cards with both broad and kindred grants
+- m2_audit_v2_conditional.csv: Conditional self-grants (exclude)
+- m2_audit_v2_inherent.csv: Inherent protection only (exclude)
+- m2_audit_v2_opponent.csv: Opponent grants (exclude)
+- m2_audit_v2_neither.csv: False positives (exclude)
+- m2_audit_v2_all.csv: All cards combined
+"""
+
+import sys
+from pathlib import Path
+import pandas as pd
+import json
+
+# Add project root to path
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from code.tagging.protection_grant_detection import (
+    categorize_protection_card,
+    get_kindred_protection_tags,
+    is_granting_protection,
+)
+
+def load_all_cards():
+    """Load all cards from color/identity CSV files."""
+    csv_dir = project_root / 'csv_files'
+    
+    # Get all color/identity CSVs (not the raw cards.csv)
+    csv_files = list(csv_dir.glob('*_cards.csv'))
+    csv_files = [f for f in csv_files if f.stem not in ['cards', 'testdata']]
+    
+    all_cards = []
+    for csv_file in csv_files:
+        try:
+            df = pd.read_csv(csv_file)
+            all_cards.append(df)
+        except Exception as e:
+            print(f"Warning: Could not load {csv_file.name}: {e}")
+    
+    # Combine all DataFrames
+    combined = pd.concat(all_cards, ignore_index=True)
+    
+    # Drop duplicates (cards appear in multiple color files)
+    combined = combined.drop_duplicates(subset=['name'], keep='first')
+    
+    return combined
+
+def audit_all_protection_cards():
+    """Audit all Protection-tagged cards."""
+    print("Loading all cards...")
+    df = load_all_cards()
+    
+    print(f"Total cards loaded: {len(df)}")
+    
+    # Filter to Protection-tagged cards (column is 'themeTags' in color CSVs)
+    df_prot = df[df['themeTags'].str.contains('Protection', case=False, na=False)].copy()
+    
+    print(f"Protection-tagged cards: {len(df_prot)}")
+    
+    # Categorize each card
+    categories = []
+    grants_list = []
+    kindred_tags_list = []
+    
+    for idx, row in df_prot.iterrows():
+        name = row['name']
+        text = str(row.get('text', '')).replace('\\n', '\n')  # Convert escaped newlines to real newlines
+        keywords = str(row.get('keywords', ''))
+        card_type = str(row.get('type', ''))
+        
+        # Categorize with kindred exclusion enabled
+        category = categorize_protection_card(name, text, keywords, card_type, exclude_kindred=True)
+        
+        # Check if it grants broadly
+        grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
+        
+        # Get kindred tags
+        kindred_tags = get_kindred_protection_tags(text)
+        
+        categories.append(category)
+        grants_list.append(grants_broad)
+        kindred_tags_list.append(', '.join(sorted(kindred_tags)) if kindred_tags else '')
+    
+    df_prot['category'] = categories
+    df_prot['grants_broad'] = grants_list
+    df_prot['kindred_tags'] = kindred_tags_list
+    
+    # Generate summary (convert numpy types to native Python for JSON serialization)
+    summary = {
+        'total': int(len(df_prot)),
+        'categories': {k: int(v) for k, v in df_prot['category'].value_counts().to_dict().items()},
+        'grants_broad_count': int(df_prot['grants_broad'].sum()),
+        'kindred_cards_count': int((df_prot['kindred_tags'] != '').sum()),
+    }
+    
+    # Calculate keep vs remove
+    keep_categories = {'Grant', 'Mixed'}
+    kindred_only = df_prot[df_prot['category'] == 'Kindred']
+    keep_count = len(df_prot[df_prot['category'].isin(keep_categories)])
+    remove_count = len(df_prot[~df_prot['category'].isin(keep_categories | {'Kindred'})])
+    
+    summary['keep_main_tag'] = keep_count
+    summary['kindred_metadata'] = len(kindred_only)
+    summary['remove'] = remove_count
+    summary['precision_estimate'] = round((keep_count / len(df_prot)) * 100, 1) if len(df_prot) > 0 else 0
+    
+    # Print summary
+    print(f"\n{'='*60}")
+    print("AUDIT SUMMARY")
+    print(f"{'='*60}")
+    print(f"Total Protection-tagged cards: {summary['total']}")
+    print(f"\nCategories:")
+    for cat, count in sorted(summary['categories'].items()):
+        pct = (count / summary['total']) * 100
+        print(f"  {cat:20s} {count:4d} ({pct:5.1f}%)")
+    
+    print(f"\n{'='*60}")
+    print(f"Main Protection tag:  {keep_count:4d} ({keep_count/len(df_prot)*100:5.1f}%)")
+    print(f"Kindred metadata only: {len(kindred_only):4d} ({len(kindred_only)/len(df_prot)*100:5.1f}%)")
+    print(f"Remove:               {remove_count:4d} ({remove_count/len(df_prot)*100:5.1f}%)")
+    print(f"{'='*60}")
+    print(f"Precision estimate:   {summary['precision_estimate']}%")
+    print(f"{'='*60}\n")
+    
+    # Export results
+    output_dir = project_root / 'logs' / 'roadmaps' / 'source' / 'tagging_refinement'
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Export JSON summary
+    with open(output_dir / 'm2_audit_v2.json', 'w') as f:
+        json.dump({
+            'summary': summary,
+            'cards': df_prot[['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']].to_dict(orient='records')
+        }, f, indent=2)
+    
+    # Export CSVs by category
+    export_cols = ['name', 'type', 'category', 'grants_broad', 'kindred_tags', 'keywords', 'text']
+    
+    # Grant category
+    df_grant = df_prot[df_prot['category'] == 'Grant']
+    df_grant[export_cols].to_csv(output_dir / 'm2_audit_v2_grant.csv', index=False)
+    print(f"Exported {len(df_grant)} Grant cards to m2_audit_v2_grant.csv")
+    
+    # Kindred category
+    df_kindred = df_prot[df_prot['category'] == 'Kindred']
+    df_kindred[export_cols].to_csv(output_dir / 'm2_audit_v2_kindred.csv', index=False)
+    print(f"Exported {len(df_kindred)} Kindred cards to m2_audit_v2_kindred.csv")
+    
+    # Mixed category
+    df_mixed = df_prot[df_prot['category'] == 'Mixed']
+    df_mixed[export_cols].to_csv(output_dir / 'm2_audit_v2_mixed.csv', index=False)
+    print(f"Exported {len(df_mixed)} Mixed cards to m2_audit_v2_mixed.csv")
+    
+    # ConditionalSelf category
+    df_conditional = df_prot[df_prot['category'] == 'ConditionalSelf']
+    df_conditional[export_cols].to_csv(output_dir / 'm2_audit_v2_conditional.csv', index=False)
+    print(f"Exported {len(df_conditional)} ConditionalSelf cards to m2_audit_v2_conditional.csv")
+    
+    # Inherent category
+    df_inherent = df_prot[df_prot['category'] == 'Inherent']
+    df_inherent[export_cols].to_csv(output_dir / 'm2_audit_v2_inherent.csv', index=False)
+    print(f"Exported {len(df_inherent)} Inherent cards to m2_audit_v2_inherent.csv")
+    
+    # Opponent category
+    df_opponent = df_prot[df_prot['category'] == 'Opponent']
+    df_opponent[export_cols].to_csv(output_dir / 'm2_audit_v2_opponent.csv', index=False)
+    print(f"Exported {len(df_opponent)} Opponent cards to m2_audit_v2_opponent.csv")
+    
+    # Neither category
+    df_neither = df_prot[df_prot['category'] == 'Neither']
+    df_neither[export_cols].to_csv(output_dir / 'm2_audit_v2_neither.csv', index=False)
+    print(f"Exported {len(df_neither)} Neither cards to m2_audit_v2_neither.csv")
+    
+    # All cards
+    df_prot[export_cols].to_csv(output_dir / 'm2_audit_v2_all.csv', index=False)
+    print(f"Exported {len(df_prot)} total cards to m2_audit_v2_all.csv")
+    
+    print(f"\nAll files saved to: {output_dir}")
+    
+    return df_prot, summary
+
+if __name__ == '__main__':
+    df_results, summary = audit_all_protection_cards()
--- a/code/settings.py
+++ b/code/settings.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 # Standard library imports
+import os
 from typing import Dict, List, Optional

 # ----------------------------------------------------------------------------------
@ -98,4 +99,17 @@ CSV_DIRECTORY: str = 'csv_files'
 FILL_NA_COLUMNS: Dict[str, Optional[str]] = {
    'colorIdentity': 'Colorless',  # Default color identity for cards without one
    'faceName': None  # Use card's name column value when face name is not available
-}
+}
+
+# ----------------------------------------------------------------------------------
+# TAGGING REFINEMENT FEATURE FLAGS (M1-M3)
+# ----------------------------------------------------------------------------------
+
+# M1: Enable keyword normalization and singleton pruning
+TAG_NORMALIZE_KEYWORDS = os.getenv('TAG_NORMALIZE_KEYWORDS', '1').lower() not in ('0', 'false', 'off', 'disabled')
+
+# M2: Enable protection grant detection (planned)
+TAG_PROTECTION_GRANTS = os.getenv('TAG_PROTECT ION_GRANTS', '0').lower() not in ('0', 'false', 'off', 'disabled')
+
+# M3: Enable metadata/theme partition (planned)
+TAG_METADATA_SPLIT = os.getenv('TAG_METADATA_SPLIT', '0').lower() not in ('0', 'false', 'off', 'disabled')
--- a/code/tagging/protection_grant_detection.py
+++ b/code/tagging/protection_grant_detection.py
@ -0,0 +1,493 @@
+"""
+Protection grant detection implementation for M2.
+
+This module provides helpers to distinguish cards that grant protection effects
+from cards that have inherent protection effects.
+
+Usage in tagger.py:
+    from code.tagging.protection_grant_detection import is_granting_protection
+    
+    if is_granting_protection(text, keywords):
+        # Tag as Protection
+"""
+
+import re
+from typing import Set, List, Pattern
+
+from code.tagging.tag_constants import CREATURE_TYPES
+
+
+# Pre-compile kindred detection patterns at module load for performance
+# Pattern: (compiled_regex, tag_name_template)
+KINDRED_PATTERNS: List[tuple[Pattern, str]] = []
+
+def _init_kindred_patterns():
+    """Initialize pre-compiled kindred patterns for all creature types."""
+    global KINDRED_PATTERNS
+    if KINDRED_PATTERNS:
+        return  # Already initialized
+    
+    for creature_type in CREATURE_TYPES:
+        creature_lower = creature_type.lower()
+        creature_escaped = re.escape(creature_lower)
+        tag_name = f"{creature_type}s Gain Protection"
+        
+        # Create 3 patterns per type
+        patterns_to_compile = [
+            (rf'\bother {creature_escaped}s?\b.*\b(have|gain)\b', tag_name),
+            (rf'\b{creature_escaped} creatures?\b.*\b(have|gain)\b', tag_name),
+            (rf'\btarget {creature_escaped}\b.*\bgains?\b', tag_name),
+        ]
+        
+        for pattern_str, tag in patterns_to_compile:
+            try:
+                compiled = re.compile(pattern_str, re.IGNORECASE)
+                KINDRED_PATTERNS.append((compiled, tag))
+            except re.error:
+                # Skip patterns that fail to compile
+                pass
+
+
+# Grant verb patterns - cards that give protection to other permanents
+# These patterns look for grant verbs that affect OTHER permanents, not self
+GRANT_VERB_PATTERNS = [
+    r'\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'\bgive[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'\bgrant[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'\bget[s]?\b.*\+.*\b(hexproof|shroud|indestructible|ward|protection)\b',  # "gets +X/+X and has" pattern
+]
+
+# Self-reference patterns that should NOT count as granting
+# Reminder text and keyword lines only
+SELF_REFERENCE_PATTERNS = [
+    r'^\s*(hexproof|shroud|indestructible|ward|protection)',  # Start of text (keyword ability)
+    r'\([^)]*\b(hexproof|shroud|indestructible|ward|protection)[^)]*\)',  # Reminder text in parens
+]
+
+# Conditional self-grant patterns - activated/triggered abilities that grant to self
+CONDITIONAL_SELF_GRANT_PATTERNS = [
+    # Activated abilities
+    r'\{[^}]*\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'discard.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
+    r'\{t\}.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
+    r'sacrifice.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
+    r'pay.*life.*:.*\bthis (creature|permanent|artifact|enchantment)\b.*\bgain[s]?\b',
+    # Triggered abilities that grant to self only
+    r'whenever.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'whenever you (cast|play|attack|cycle|discard|commit).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'at the beginning.*\b(this creature|this permanent|it)\b.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'whenever.*\b(this creature|this permanent)\b (attacks|enters|becomes).*\b(this creature|this permanent|it)\b.*\bgain[s]?\b',
+    # Named self-references (e.g., "Pristine Skywise gains")
+    r'whenever you cast.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    r'whenever you.*[A-Z][a-z]+.*gains.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    # Static conditional abilities (as long as, if you control X)
+    r'as long as.*\b(this creature|this permanent|it|has)\b.*(has|gains?).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+]
+
+# Mass grant patterns - affects multiple creatures YOU control
+MASS_GRANT_PATTERNS = [
+    r'creatures you control (have|gain|get)',
+    r'other .* you control (have|gain|get)',
+    r'(artifacts?|enchantments?|permanents?) you control (have|gain|get)',  # Artifacts you control have...
+    r'other (creatures?|artifacts?|enchantments?) (have|gain|get)',  # Other creatures have...
+    r'all (creatures?|slivers?|permanents?) (have|gain|get)',  # All creatures/slivers have...
+]
+
+# Targeted grant patterns - must specify "you control"
+TARGETED_GRANT_PATTERNS = [
+    r'target .* you control (gains?|gets?|has)',
+    r'equipped creature (gains?|gets?|has)',
+    r'enchanted creature (gains?|gets?|has)',
+]
+
+# Exclusion patterns - cards that remove or prevent protection
+EXCLUSION_PATTERNS = [
+    r"can't have (hexproof|indestructible|ward|shroud)",
+    r"lose[s]? (hexproof|indestructible|ward|shroud|protection)",
+    r"without (hexproof|indestructible|ward|shroud)",
+    r"protection from.*can't",
+]
+
+# Opponent grant patterns - grants to opponent's permanents (EXCLUDE these)
+OPPONENT_GRANT_PATTERNS = [
+    r'target opponent',
+    r'each opponent',
+    r'all creatures',  # "all creatures" without "you control"
+    r'all permanents',  # "all permanents" without "you control"
+    r'each player',
+    r'each creature',  # "each creature" without "you control"
+]
+
+# Kindred-specific grant patterns for metadata tagging
+KINDRED_GRANT_PATTERNS = {
+    'Knights Gain Protection': [
+        r'knight[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other knight[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Merfolk Gain Protection': [
+        r'merfolk you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other merfolk.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Zombies Gain Protection': [
+        r'zombie[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other zombie[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'target.*zombie.*\bgain[s]?\b.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Vampires Gain Protection': [
+        r'vampire[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other vampire[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Elves Gain Protection': [
+        r'el(f|ves) you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other el(f|ves).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Dragons Gain Protection': [
+        r'dragon[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other dragon[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Goblins Gain Protection': [
+        r'goblin[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other goblin[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Slivers Gain Protection': [
+        r'sliver[s]? you control.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'all sliver[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other sliver[s]?.*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Artifacts Gain Protection': [
+        r'artifact[s]? you control (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other artifact[s]? (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+    'Enchantments Gain Protection': [
+        r'enchantment[s]? you control (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+        r'other enchantment[s]? (have|gain).*\b(hexproof|shroud|indestructible|ward|protection)\b',
+    ],
+}
+
+# Protection keyword patterns for inherent check
+PROTECTION_KEYWORDS = {
+    'hexproof',
+    'shroud', 
+    'indestructible',
+    'ward',
+    'protection from',
+    'protection',
+}
+
+
+def get_kindred_protection_tags(text: str) -> Set[str]:
+    """
+    Identify kindred-specific protection grants for metadata tagging.
+    
+    Returns a set of metadata tag names like "Knights Gain Protection".
+    
+    Uses both predefined patterns and dynamic creature type detection.
+    """
+    if not text:
+        return set()
+    
+    # Initialize pre-compiled patterns if needed
+    _init_kindred_patterns()
+    
+    text_lower = text.lower()
+    tags = set()
+    
+    # Check predefined patterns (specific kindred types we track)
+    for tag_name, patterns in KINDRED_GRANT_PATTERNS.items():
+        for pattern in patterns:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                tags.add(tag_name)
+                break  # Found match for this kindred type, move to next
+    
+    # Only check dynamic patterns if protection keywords present (performance optimization)
+    if not any(keyword in text_lower for keyword in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']):
+        return tags
+    
+    # Use pre-compiled patterns for all creature types
+    for compiled_pattern, tag_name in KINDRED_PATTERNS:
+        if compiled_pattern.search(text_lower):
+            tags.add(tag_name)
+            # Don't break - a card could grant to multiple creature types
+    
+    return tags
+
+
+def is_opponent_grant(text: str) -> bool:
+    """
+    Check if card grants protection to opponent's permanents or all permanents.
+    
+    Returns True if this grants to opponents (should be excluded from Protection tag).
+    """
+    if not text:
+        return False
+    
+    text_lower = text.lower()
+    
+    # Check for opponent grant patterns
+    for pattern in OPPONENT_GRANT_PATTERNS:
+        if re.search(pattern, text_lower, re.IGNORECASE):
+            # Make sure it's not "target opponent" for a different effect
+            # Must be in context of granting protection
+            if any(prot in text_lower for prot in ['hexproof', 'shroud', 'indestructible', 'ward', 'protection']):
+                # Check if "you control" appears in same sentence
+                if 'you control' not in text_lower.split('.')[0]:
+                    return True
+    
+    return False
+
+
+def has_conditional_self_grant(text: str) -> bool:
+    """
+    Check if card has any conditional self-grant patterns.
+    This does NOT check if it ALSO grants to others.
+    """
+    if not text:
+        return False
+    
+    text_lower = text.lower()
+    
+    # Check for conditional self-grant patterns (activated/triggered abilities)
+    for pattern in CONDITIONAL_SELF_GRANT_PATTERNS:
+        if re.search(pattern, text_lower, re.IGNORECASE):
+            return True
+    
+    return False
+
+
+def is_conditional_self_grant(text: str) -> bool:
+    """
+    Check if card only conditionally grants protection to itself.
+    
+    Examples: 
+    - "{B}, Discard a card: This creature gains hexproof until end of turn."
+    - "Whenever you cast a noncreature spell, untap this creature. It gains protection..."
+    - "Whenever this creature attacks, it gains indestructible until end of turn."
+    
+    These should be excluded as they don't provide protection to OTHER permanents.
+    """
+    if not text:
+        return False
+    
+    text_lower = text.lower()
+    
+    # Check if it has conditional self-grant patterns
+    found_conditional_self = has_conditional_self_grant(text)
+    
+    if not found_conditional_self:
+        return False
+    
+    # If we found a conditional self-grant, check if there's ALSO a grant to others
+    # Look for patterns that grant to creatures besides itself
+    has_other_grant = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in [
+        r'other creatures',
+        r'creatures you control (have|gain)',
+        r'target (creature|permanent) you control gains',
+        r'another target (creature|permanent)',
+        r'equipped creature (has|gains)',
+        r'enchanted creature (has|gains)',
+        r'target legendary',
+        r'permanents you control gain',
+    ])
+    
+    # Return True only if it's ONLY conditional self-grants (no other grants)
+    return not has_other_grant
+
+
+def is_granting_protection(text: str, keywords: str, exclude_kindred: bool = False) -> bool:
+    """
+    Determine if a card grants protection effects to other permanents.
+    
+    Returns True if the card gives/grants protection to other cards unconditionally.
+    Returns False if:
+    - Card only has inherent protection
+    - Card only conditionally grants to itself
+    - Card grants to opponent's permanents
+    - Card grants only to specific kindred types (when exclude_kindred=True)
+    - Card creates tokens with protection (not granting to existing permanents)
+    - Card only modifies non-protection stats of other permanents
+    
+    Args:
+        text: Card text to analyze
+        keywords: Card keywords (comma-separated)
+        exclude_kindred: If True, exclude kindred-specific grants
+        
+    Returns:
+        True if card grants broad protection, False otherwise
+    """
+    if not text:
+        return False
+        
+    text_lower = text.lower()
+    
+    # EXCLUDE: Opponent grants
+    if is_opponent_grant(text):
+        return False
+    
+    # EXCLUDE: Conditional self-grants only
+    if is_conditional_self_grant(text):
+        return False
+    
+    # EXCLUDE: Cards that remove protection
+    for pattern in EXCLUSION_PATTERNS:
+        if re.search(pattern, text_lower, re.IGNORECASE):
+            return False
+    
+    # EXCLUDE: Token creation with protection (not granting to existing permanents)
+    if re.search(r'create.*token.*with.*(hexproof|shroud|indestructible|ward|protection)', text_lower, re.IGNORECASE):
+        # Check if there's ALSO granting to other permanents
+        has_grant_to_others = any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in MASS_GRANT_PATTERNS)
+        if not has_grant_to_others:
+            return False
+    
+    # EXCLUDE: Kindred-specific grants if requested
+    if exclude_kindred:
+        kindred_tags = get_kindred_protection_tags(text)
+        if kindred_tags:
+            # If we detected kindred tags, check if there's ALSO a non-kindred grant
+            # Look for grant patterns that explicitly grant to ALL creatures/permanents broadly
+            has_broad_grant = False
+            
+            # Patterns that indicate truly broad grants (not type-specific)
+            broad_only_patterns = [
+                r'\bcreatures you control (have|gain)\b(?!.*(knight|merfolk|zombie|elf|dragon|goblin|sliver))',  # Only if not followed by type
+                r'\bpermanents you control (have|gain)\b',
+                r'\beach (creature|permanent) you control',
+                r'\ball (creatures?|permanents?)',
+            ]
+            
+            for pattern in broad_only_patterns:
+                if re.search(pattern, text_lower, re.IGNORECASE):
+                    has_broad_grant = True
+                    break
+            
+            if not has_broad_grant:
+                return False  # Only kindred grants, exclude
+    
+    # Check if card has inherent protection keywords
+    has_inherent = False
+    if keywords:
+        keywords_lower = keywords.lower()
+        has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS)
+    
+    # Check for explicit grants with protection keywords
+    found_grant = False
+    
+    # Mass grant patterns (creatures you control have/gain)
+    for pattern in MASS_GRANT_PATTERNS:
+        match = re.search(pattern, text_lower, re.IGNORECASE)
+        if match:
+            # Check if protection keyword appears in the same sentence or nearby (within 70 chars AFTER the match)
+            # This ensures we're looking at "creatures you control HAVE hexproof" not just having both phrases
+            context_start = match.start()
+            context_end = min(len(text_lower), match.end() + 70)
+            context = text_lower[context_start:context_end]
+            
+            if any(prot in context for prot in PROTECTION_KEYWORDS):
+                found_grant = True
+                break
+    
+    # Targeted grant patterns (target creature gains)
+    if not found_grant:
+        for pattern in TARGETED_GRANT_PATTERNS:
+            match = re.search(pattern, text_lower, re.IGNORECASE)
+            if match:
+                # Check if protection keyword appears after the grant verb (within 70 chars)
+                context_start = match.start()
+                context_end = min(len(text_lower), match.end() + 70)
+                context = text_lower[context_start:context_end]
+                
+                if any(prot in context for prot in PROTECTION_KEYWORDS):
+                        found_grant = True
+                        break
+    
+    # Grant verb patterns (creature gains/gets hexproof)
+    if not found_grant:
+        for pattern in GRANT_VERB_PATTERNS:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                found_grant = True
+                break
+    
+    # If we have inherent protection and the ONLY text is about stats (no grant words), exclude
+    if has_inherent and not found_grant:
+        # Check if text only talks about other stats (power/toughness, +X/+X)
+        has_stat_only = bool(re.search(r'(get[s]?|gain[s]?)\s+[+\-][0-9X]+/[+\-][0-9X]+', text_lower))
+        # Check if text mentions "other" without protection keywords
+        mentions_other_without_prot = 'other' in text_lower and not any(prot in text_lower for prot in PROTECTION_KEYWORDS if prot in text_lower[text_lower.find('other'):])
+        
+        if has_stat_only or mentions_other_without_prot:
+            return False
+    
+    return found_grant
+
+
+def categorize_protection_card(name: str, text: str, keywords: str, card_type: str, exclude_kindred: bool = False) -> str:
+    """
+    Categorize a Protection-tagged card for audit purposes.
+    
+    Args:
+        name: Card name
+        text: Card text
+        keywords: Card keywords
+        card_type: Card type line
+        exclude_kindred: If True, kindred-specific grants are categorized as metadata, not Grant
+    
+    Returns:
+        'Grant' - gives broad protection to others
+        'Kindred' - gives kindred-specific protection (metadata tag)
+        'Inherent' - has protection itself
+        'ConditionalSelf' - only conditionally grants to itself
+        'Opponent' - grants to opponent's permanents
+        'Neither' - false positive
+    """
+    keywords_lower = keywords.lower() if keywords else ''
+    
+    # Check for opponent grants first
+    if is_opponent_grant(text):
+        return 'Opponent'
+    
+    # Check for conditional self-grants (ONLY self, no other grants)
+    if is_conditional_self_grant(text):
+        return 'ConditionalSelf'
+    
+    # Check if it has conditional self-grant (may also have other grants)
+    has_cond_self = has_conditional_self_grant(text)
+    
+    # Check if it has inherent protection
+    has_inherent = any(k in keywords_lower for k in PROTECTION_KEYWORDS)
+    
+    # Check for kindred-specific grants
+    kindred_tags = get_kindred_protection_tags(text)
+    if kindred_tags and exclude_kindred:
+        # Check if there's ALSO a broad grant (excluding kindred)
+        grants_broad = is_granting_protection(text, keywords, exclude_kindred=True)
+        
+        if grants_broad and has_inherent:
+            # Has inherent + kindred + broad grants
+            return 'Mixed'
+        elif grants_broad:
+            # Has kindred + broad grants (but no inherent)
+            # This is just Grant with kindred metadata tags
+            return 'Grant'
+        elif has_inherent:
+            # Has inherent + kindred only (not broad)
+            # This is still just Kindred category (inherent is separate from granting)
+            return 'Kindred'
+        else:
+            # Only kindred grants, no inherent or broad
+            return 'Kindred'
+    
+    # Check if it grants protection broadly (not kindred-specific)
+    grants_protection = is_granting_protection(text, keywords, exclude_kindred=exclude_kindred)
+    
+    # Categorize based on what it does
+    if grants_protection and has_cond_self:
+        # Has conditional self-grant + grants to others = Mixed
+        return 'Mixed'
+    elif grants_protection and has_inherent:
+        return 'Mixed'  # Has inherent + grants broadly
+    elif grants_protection:
+        return 'Grant'  # Only grants broadly
+    elif has_inherent:
+        return 'Inherent'  # Only has inherent
+    else:
+        return 'Neither'  # False positive
--- a/code/tagging/tag_constants.py
+++ b/code/tagging/tag_constants.py
@ -849,4 +849,89 @@ TOPDECK_EXCLUSION_PATTERNS: List[str] = [
    'from the top of their library',
    'look at the top card of target player\'s library',
    'reveal the top card of target player\'s library'
+]
+
+# ==============================================================================
+# Keyword Normalization (M1 - Tagging Refinement)
+# ==============================================================================
+
+# Keyword normalization map: variant -> canonical
+# Maps Commander-specific and variant keywords to their canonical forms
+KEYWORD_NORMALIZATION_MAP: Dict[str, str] = {
+    # Commander variants
+    'Commander ninjutsu': 'Ninjutsu',
+    'Commander Ninjutsu': 'Ninjutsu',
+    
+    # Partner variants (already excluded but mapped for reference)
+    'Partner with': 'Partner',
+    'Choose a Background': 'Choose a Background',  # Keep distinct
+    "Doctor's Companion": "Doctor's Companion",    # Keep distinct
+    
+    # Case normalization for common keywords (most are already correct)
+    'flying': 'Flying',
+    'trample': 'Trample',
+    'vigilance': 'Vigilance',
+    'haste': 'Haste',
+    'deathtouch': 'Deathtouch',
+    'lifelink': 'Lifelink',
+    'menace': 'Menace',
+    'reach': 'Reach',
+}
+
+# Keywords that should never appear in theme tags
+# Already excluded during keyword tagging, but documented here
+KEYWORD_EXCLUSION_SET: set[str] = {
+    'partner',  # Already excluded in tag_for_keywords
+}
+
+# Keyword allowlist - keywords that should survive singleton pruning
+# Seeded from top keywords and theme whitelist
+KEYWORD_ALLOWLIST: set[str] = {
+    # Evergreen keywords (top 50 from baseline)
+    'Flying', 'Enchant', 'Trample', 'Vigilance', 'Haste', 'Equip', 'Flash',
+    'Mill', 'Scry', 'Transform', 'Cycling', 'First strike', 'Reach', 'Menace',
+    'Lifelink', 'Treasure', 'Defender', 'Deathtouch', 'Kicker', 'Flashback',
+    'Protection', 'Surveil', 'Landfall', 'Crew', 'Ward', 'Morph', 'Devoid',
+    'Investigate', 'Fight', 'Food', 'Partner', 'Double strike', 'Indestructible',
+    'Threshold', 'Proliferate', 'Convoke', 'Hexproof', 'Cumulative upkeep',
+    'Goad', 'Delirium', 'Prowess', 'Suspend', 'Affinity', 'Madness', 'Manifest',
+    'Amass', 'Domain', 'Unearth', 'Explore', 'Changeling',
+    
+    # Additional important mechanics
+    'Myriad', 'Cascade', 'Storm', 'Dredge', 'Delve', 'Escape', 'Mutate',
+    'Ninjutsu', 'Overload', 'Rebound', 'Retrace', 'Bloodrush', 'Cipher',
+    'Extort', 'Evolve', 'Undying', 'Persist', 'Wither', 'Infect', 'Annihilator',
+    'Exalted', 'Phasing', 'Shadow', 'Horsemanship', 'Banding', 'Rampage',
+    'Shroud', 'Split second', 'Totem armor', 'Living weapon', 'Undaunted',
+    'Improvise', 'Surge', 'Emerge', 'Escalate', 'Meld', 'Partner', 'Afflict',
+    'Aftermath', 'Embalm', 'Eternalize', 'Exert', 'Fabricate', 'Improvise',
+    'Assist', 'Jump-start', 'Mentor', 'Riot', 'Spectacle', 'Addendum',
+    'Afterlife', 'Adapt', 'Enrage', 'Ascend', 'Learn', 'Boast', 'Foretell',
+    'Squad', 'Encore', 'Daybound', 'Nightbound', 'Disturb', 'Cleave', 'Training',
+    'Reconfigure', 'Blitz', 'Casualty', 'Connive', 'Hideaway', 'Prototype',
+    'Read ahead', 'Living metal', 'More than meets the eye', 'Ravenous',
+    'Squad', 'Toxic', 'For Mirrodin!', 'Backup', 'Bargain', 'Craft', 'Freerunning',
+    'Plot', 'Spree', 'Offspring', 'Bestow', 'Monstrosity', 'Tribute',
+    
+    # Partner mechanics (distinct types)
+    'Choose a Background', "Doctor's Companion",
+    
+    # Token types (frequently used)
+    'Blood', 'Clue', 'Food', 'Gold', 'Treasure', 'Powerstone',
+    
+    # Common ability words
+    'Landfall', 'Raid', 'Revolt', 'Threshold', 'Metalcraft', 'Morbid',
+    'Bloodthirst', 'Battalion', 'Channel', 'Grandeur', 'Kinship', 'Sweep',
+    'Radiance', 'Join forces', 'Fateful hour', 'Inspired', 'Heroic',
+    'Constellation', 'Strive', 'Prowess', 'Ferocious', 'Formidable', 'Renown',
+    'Tempting offer', 'Will of the council', 'Parley', 'Adamant', 'Devotion',
+}
+
+# Metadata tag prefixes (for M3 - metadata partition)
+# Tags matching these patterns should be classified as metadata, not themes
+METADATA_TAG_PREFIXES: List[str] = [
+    'Applied:',
+    'Bracket:',
+    'Diagnostic:',
+    'Internal:',
 ]
--- a/code/tagging/tag_utils.py
+++ b/code/tagging/tag_utils.py
@ -509,4 +509,77 @@ def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series[bool]:
    damage_mask = create_text_mask(df, number_patterns)
    target_mask = create_text_mask(df, target_patterns)
    
-    return damage_mask & target_mask
+    return damage_mask & target_mask
+
+
+# ==============================================================================
+# Keyword Normalization (M1 - Tagging Refinement)
+# ==============================================================================
+
+def normalize_keywords(
+    raw: Union[List[str], Set[str], Tuple[str, ...]],
+    allowlist: Set[str],
+    frequency_map: dict[str, int]
+) -> list[str]:
+    """Normalize keyword strings for theme tagging.
+    
+    Applies normalization rules:
+    1. Case normalization (via normalization map)
+    2. Canonical mapping (e.g., "Commander Ninjutsu" -> "Ninjutsu")
+    3. Singleton pruning (unless allowlisted)
+    4. Deduplication
+    5. Exclusion of blacklisted keywords
+    
+    Args:
+        raw: Iterable of raw keyword strings
+        allowlist: Set of keywords that should survive singleton pruning
+        frequency_map: Dict mapping keywords to their occurrence count
+    
+    Returns:
+        Deduplicated list of normalized keywords
+        
+    Raises:
+        ValueError: If raw is not iterable
+        
+    Examples:
+        >>> normalize_keywords(
+        ...     ['Commander Ninjutsu', 'Flying', 'Allons-y!'],
+        ...     {'Flying', 'Ninjutsu'},
+        ...     {'Commander Ninjutsu': 2, 'Flying': 100, 'Allons-y!': 1}
+        ... )
+        ['Ninjutsu', 'Flying']  # 'Allons-y!' pruned as singleton
+    """
+    if not hasattr(raw, '__iter__') or isinstance(raw, (str, bytes)):
+        raise ValueError(f"raw must be iterable, got {type(raw)}")
+    
+    normalized_keywords: set[str] = set()
+    
+    for keyword in raw:
+        # Skip non-string entries
+        if not isinstance(keyword, str):
+            continue
+            
+        # Skip empty strings
+        keyword = keyword.strip()
+        if not keyword:
+            continue
+        
+        # Skip excluded keywords
+        if keyword.lower() in tag_constants.KEYWORD_EXCLUSION_SET:
+            continue
+        
+        # Apply normalization map
+        normalized = tag_constants.KEYWORD_NORMALIZATION_MAP.get(keyword, keyword)
+        
+        # Check if singleton (unless allowlisted)
+        frequency = frequency_map.get(keyword, 0)
+        is_singleton = frequency == 1
+        is_allowlisted = normalized in allowlist or keyword in allowlist
+        
+        # Prune singletons that aren't allowlisted
+        if is_singleton and not is_allowlisted:
+            continue
+        
+        normalized_keywords.add(normalized)
+    
+    return sorted(list(normalized_keywords))
--- a/code/tagging/tagger.py
+++ b/code/tagging/tagger.py
@ -580,6 +580,11 @@ def add_creatures_to_tags(df: pd.DataFrame, color: str) -> None:
 ## Add keywords to theme tags
 def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
    """Tag cards based on their keywords using vectorized operations.
+    
+    When TAG_NORMALIZE_KEYWORDS is enabled, applies normalization:
+    - Canonical mapping (e.g., "Commander Ninjutsu" -> "Ninjutsu")
+    - Singleton pruning (unless allowlisted)
+    - Case normalization

    Args:
        df: DataFrame containing card data
@ -589,6 +594,20 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
    start_time = pd.Timestamp.now()

    try:
+        from settings import TAG_NORMALIZE_KEYWORDS
+        
+        # Load frequency map if normalization is enabled
+        frequency_map: dict[str, int] = {}
+        if TAG_NORMALIZE_KEYWORDS:
+            freq_map_path = Path(__file__).parent / 'keyword_frequency_map.json'
+            if freq_map_path.exists():
+                with open(freq_map_path, 'r', encoding='utf-8') as f:
+                    frequency_map = json.load(f)
+                logger.info('Loaded keyword frequency map with %d entries', len(frequency_map))
+            else:
+                logger.warning('Keyword frequency map not found, normalization disabled for this run')
+                TAG_NORMALIZE_KEYWORDS = False
+        
        # Create mask for valid keywords
        has_keywords = pd.notna(df['keywords'])

@ -608,17 +627,29 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None:
                else:
                    keywords_iterable = []

-                filtered_keywords = [
-                    kw for kw in keywords_iterable
-                    if kw and kw.lower() not in exclusion_keywords
-                ]
-
-                return sorted(list(set(base_tags + filtered_keywords)))
+                # Apply normalization if enabled
+                if TAG_NORMALIZE_KEYWORDS and frequency_map:
+                    normalized_keywords = tag_utils.normalize_keywords(
+                        keywords_iterable,
+                        tag_constants.KEYWORD_ALLOWLIST,
+                        frequency_map
+                    )
+                    return sorted(list(set(base_tags + normalized_keywords)))
+                else:
+                    # Legacy behavior: simple exclusion filter
+                    filtered_keywords = [
+                        kw for kw in keywords_iterable
+                        if kw and kw.lower() not in exclusion_keywords
+                    ]
+                    return sorted(list(set(base_tags + filtered_keywords)))

            df.loc[has_keywords, 'themeTags'] = keywords_df.apply(_merge_keywords, axis=1)

        duration = (pd.Timestamp.now() - start_time).total_seconds()
        logger.info('Tagged %d cards with keywords in %.2f seconds', has_keywords.sum(), duration)
+        
+        if TAG_NORMALIZE_KEYWORDS:
+            logger.info('Keyword normalization enabled for %s', color)

    except Exception as e:
        logger.error('Error tagging keywords: %s', str(e))
@ -7000,6 +7031,9 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None:
    - Ward
    - Phase out

+    With TAG_PROTECTION_GRANTS=1, only tags cards that grant protection to other
+    permanents, filtering out cards with inherent protection.
+
    The function uses helper functions to identify different types of protection
    and applies tags consistently using vectorized operations.

@ -7025,13 +7059,47 @@ def tag_for_protection(df: pd.DataFrame, color: str) -> None:
        required_cols = {'text', 'themeTags', 'keywords'}
        tag_utils.validate_dataframe_columns(df, required_cols)

-        # Create masks for different protection patterns
-        text_mask = create_protection_text_mask(df)
-        keyword_mask = create_protection_keyword_mask(df)
-        exclusion_mask = create_protection_exclusion_mask(df)
+        # Check if grant detection is enabled (M2 feature flag)
+        use_grant_detection = os.getenv('TAG_PROTECTION_GRANTS', '1').lower() in ('1', 'true', 'yes')

-        # Combine masks
-        final_mask = (text_mask | keyword_mask) & ~exclusion_mask
+        if use_grant_detection:
+            # M2: Use grant detection to filter out inherent-only protection
+            from code.tagging.protection_grant_detection import is_granting_protection, get_kindred_protection_tags
+            
+            # Create a grant detection mask
+            grant_mask = df.apply(
+                lambda row: is_granting_protection(
+                    str(row.get('text', '')), 
+                    str(row.get('keywords', ''))
+                ),
+                axis=1
+            )
+            
+            final_mask = grant_mask
+            logger.info(f'Using M2 grant detection (TAG_PROTECTION_GRANTS=1)')
+            
+            # Apply kindred metadata tags for creature-type-specific grants
+            kindred_count = 0
+            for idx, row in df[final_mask].iterrows():
+                text = str(row.get('text', ''))
+                kindred_tags = get_kindred_protection_tags(text)
+                
+                if kindred_tags:
+                    # Add kindred-specific metadata tags
+                    current_tags = str(row.get('metadataTags', ''))
+                    existing = set(t.strip() for t in current_tags.split(',') if t.strip())
+                    existing.update(kindred_tags)
+                    df.at[idx, 'metadataTags'] = ', '.join(sorted(existing))
+                    kindred_count += 1
+            
+            if kindred_count > 0:
+                logger.info(f'Applied kindred metadata tags to {kindred_count} cards')
+        else:
+            # Legacy: Use original text/keyword patterns
+            text_mask = create_protection_text_mask(df)
+            keyword_mask = create_protection_keyword_mask(df)
+            exclusion_mask = create_protection_exclusion_mask(df)
+            final_mask = (text_mask | keyword_mask) & ~exclusion_mask

        # Apply tags via rules engine
        tag_utils.apply_rules(df, rules=[
--- a/code/tests/test_keyword_normalization.py
+++ b/code/tests/test_keyword_normalization.py
@ -0,0 +1,182 @@
+"""Tests for keyword normalization (M1 - Tagging Refinement)."""
+from __future__ import annotations
+
+import pytest
+
+from code.tagging import tag_utils, tag_constants
+
+
+class TestKeywordNormalization:
+    """Test suite for normalize_keywords function."""
+    
+    def test_canonical_mappings(self):
+        """Test that variant keywords map to canonical forms."""
+        raw = ['Commander Ninjutsu', 'Flying', 'Trample']
+        allowlist = tag_constants.KEYWORD_ALLOWLIST
+        frequency_map = {
+            'Commander Ninjutsu': 2,
+            'Flying': 100,
+            'Trample': 50
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Ninjutsu' in result
+        assert 'Flying' in result
+        assert 'Trample' in result
+        assert 'Commander Ninjutsu' not in result
+    
+    def test_singleton_pruning(self):
+        """Test that singleton keywords are pruned unless allowlisted."""
+        raw = ['Allons-y!', 'Flying', 'Take 59 Flights of Stairs']
+        allowlist = {'Flying'}  # Only Flying is allowlisted
+        frequency_map = {
+            'Allons-y!': 1,
+            'Flying': 100,
+            'Take 59 Flights of Stairs': 1
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Flying' in result
+        assert 'Allons-y!' not in result
+        assert 'Take 59 Flights of Stairs' not in result
+    
+    def test_case_normalization(self):
+        """Test that keywords are normalized to proper case."""
+        raw = ['flying', 'TRAMPLE', 'vigilance']
+        allowlist = {'Flying', 'Trample', 'Vigilance'}
+        frequency_map = {
+            'flying': 100,
+            'TRAMPLE': 50,
+            'vigilance': 75
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        # Case normalization happens via the map
+        # If not in map, original case is preserved
+        assert len(result) == 3
+    
+    def test_partner_exclusion(self):
+        """Test that partner keywords remain excluded."""
+        raw = ['Partner', 'Flying', 'Trample']
+        allowlist = {'Flying', 'Trample'}
+        frequency_map = {
+            'Partner': 50,
+            'Flying': 100,
+            'Trample': 50
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Flying' in result
+        assert 'Trample' in result
+        assert 'Partner' not in result  # Excluded
+        assert 'partner' not in result
+    
+    def test_empty_input(self):
+        """Test that empty input returns empty list."""
+        result = tag_utils.normalize_keywords([], set(), {})
+        assert result == []
+    
+    def test_whitespace_handling(self):
+        """Test that whitespace is properly stripped."""
+        raw = ['  Flying  ', 'Trample ', ' Vigilance']
+        allowlist = {'Flying', 'Trample', 'Vigilance'}
+        frequency_map = {
+            'Flying': 100,
+            'Trample': 50,
+            'Vigilance': 75
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Flying' in result
+        assert 'Trample' in result
+        assert 'Vigilance' in result
+    
+    def test_deduplication(self):
+        """Test that duplicate keywords are deduplicated."""
+        raw = ['Flying', 'Flying', 'Trample', 'Flying']
+        allowlist = {'Flying', 'Trample'}
+        frequency_map = {
+            'Flying': 100,
+            'Trample': 50
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert result.count('Flying') == 1
+        assert result.count('Trample') == 1
+    
+    def test_non_string_entries_skipped(self):
+        """Test that non-string entries are safely skipped."""
+        raw = ['Flying', None, 123, 'Trample', '']
+        allowlist = {'Flying', 'Trample'}
+        frequency_map = {
+            'Flying': 100,
+            'Trample': 50
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Flying' in result
+        assert 'Trample' in result
+        assert len(result) == 2
+    
+    def test_invalid_input_raises_error(self):
+        """Test that non-iterable input raises ValueError."""
+        with pytest.raises(ValueError, match="raw must be iterable"):
+            tag_utils.normalize_keywords("not-a-list", set(), {})
+    
+    def test_allowlist_preserves_singletons(self):
+        """Test that allowlisted keywords survive even if they're singletons."""
+        raw = ['Myriad', 'Flying', 'Cascade']
+        allowlist = {'Flying', 'Myriad', 'Cascade'}  # All allowlisted
+        frequency_map = {
+            'Myriad': 1,  # Singleton
+            'Flying': 100,
+            'Cascade': 1  # Singleton
+        }
+        
+        result = tag_utils.normalize_keywords(raw, allowlist, frequency_map)
+        
+        assert 'Myriad' in result  # Preserved despite being singleton
+        assert 'Flying' in result
+        assert 'Cascade' in result  # Preserved despite being singleton
+
+
+class TestKeywordIntegration:
+    """Integration tests for keyword normalization in tagging flow."""
+    
+    def test_normalization_preserves_evergreen_keywords(self):
+        """Test that common evergreen keywords are always preserved."""
+        evergreen = ['Flying', 'Trample', 'Vigilance', 'Haste', 'Deathtouch', 'Lifelink']
+        allowlist = tag_constants.KEYWORD_ALLOWLIST
+        frequency_map = {kw: 100 for kw in evergreen}  # All common
+        
+        result = tag_utils.normalize_keywords(evergreen, allowlist, frequency_map)
+        
+        for kw in evergreen:
+            assert kw in result
+    
+    def test_crossover_keywords_pruned(self):
+        """Test that crossover-specific singletons are pruned."""
+        crossover_singletons = [
+            'Gae Bolg',  # Final Fantasy
+            'Psychic Defense',  # Warhammer 40K
+            'Allons-y!',  # Doctor Who
+            'Flying'  # Evergreen (control)
+        ]
+        allowlist = {'Flying'}  # Only Flying allowed
+        frequency_map = {
+            'Gae Bolg': 1,
+            'Psychic Defense': 1,
+            'Allons-y!': 1,
+            'Flying': 100
+        }
+        
+        result = tag_utils.normalize_keywords(crossover_singletons, allowlist, frequency_map)
+        
+        assert result == ['Flying']  # Only evergreen survived
--- a/code/tests/test_protection_grant_detection.py
+++ b/code/tests/test_protection_grant_detection.py
@ -0,0 +1,169 @@
+"""
+Tests for protection grant detection (M2).
+
+Tests the ability to distinguish between cards that grant protection
+and cards that have inherent protection.
+"""
+
+import pytest
+from code.tagging.protection_grant_detection import (
+    is_granting_protection,
+    categorize_protection_card
+)
+
+
+class TestGrantDetection:
+    """Test grant verb detection."""
+    
+    def test_gains_hexproof(self):
+        """Cards with 'gains hexproof' should be detected as granting."""
+        text = "Target creature gains hexproof until end of turn."
+        assert is_granting_protection(text, "")
+    
+    def test_gives_indestructible(self):
+        """Cards with 'gives indestructible' should be detected as granting."""
+        text = "This creature gives target creature indestructible."
+        assert is_granting_protection(text, "")
+    
+    def test_creatures_you_control_have(self):
+        """Mass grant pattern should be detected."""
+        text = "Creatures you control have hexproof."
+        assert is_granting_protection(text, "")
+    
+    def test_equipped_creature_gets(self):
+        """Equipment grant pattern should be detected."""
+        text = "Equipped creature gets +2/+2 and has indestructible."
+        assert is_granting_protection(text, "")
+
+
+class TestInherentDetection:
+    """Test inherent protection detection."""
+    
+    def test_creature_with_hexproof_keyword(self):
+        """Creature with hexproof keyword should not be detected as granting."""
+        text = "Hexproof (This creature can't be the target of spells or abilities.)"
+        keywords = "Hexproof"
+        assert not is_granting_protection(text, keywords)
+    
+    def test_indestructible_artifact(self):
+        """Artifact with indestructible keyword should not be detected as granting."""
+        text = "Indestructible"
+        keywords = "Indestructible"
+        assert not is_granting_protection(text, keywords)
+    
+    def test_ward_creature(self):
+        """Creature with Ward should not be detected as granting (unless it grants to others)."""
+        text = "Ward {2}"
+        keywords = "Ward"
+        assert not is_granting_protection(text, keywords)
+
+
+class TestMixedCases:
+    """Test cards that both grant and have protection."""
+    
+    def test_creature_with_self_grant(self):
+        """Creature that grants itself protection should be detected."""
+        text = "This creature gains indestructible until end of turn."
+        keywords = ""
+        assert is_granting_protection(text, keywords)
+    
+    def test_equipment_with_inherent_and_grant(self):
+        """Equipment with indestructible that grants protection."""
+        text = "Indestructible. Equipped creature has hexproof."
+        keywords = "Indestructible"
+        # Should be detected as granting because of "has hexproof"
+        assert is_granting_protection(text, keywords)
+
+
+class TestExclusions:
+    """Test exclusion patterns."""
+    
+    def test_cant_have_hexproof(self):
+        """Cards that prevent protection should not be tagged."""
+        text = "Creatures your opponents control can't have hexproof."
+        assert not is_granting_protection(text, "")
+    
+    def test_loses_indestructible(self):
+        """Cards that remove protection should not be tagged."""
+        text = "Target creature loses indestructible until end of turn."
+        assert not is_granting_protection(text, "")
+
+
+class TestEdgeCases:
+    """Test edge cases and special patterns."""
+    
+    def test_protection_from_color(self):
+        """Protection from [quality] in keywords without grant text."""
+        text = "Protection from red"
+        keywords = "Protection from red"
+        assert not is_granting_protection(text, keywords)
+    
+    def test_empty_text(self):
+        """Empty text should return False."""
+        assert not is_granting_protection("", "")
+    
+    def test_none_text(self):
+        """None text should return False."""
+        assert not is_granting_protection(None, "")
+
+
+class TestCategorization:
+    """Test full card categorization."""
+    
+    def test_shell_shield_is_grant(self):
+        """Shell Shield grants hexproof - should be Grant."""
+        text = "Target creature gets +0/+3 and gains hexproof until end of turn."
+        cat = categorize_protection_card("Shell Shield", text, "", "Instant")
+        assert cat == "Grant"
+    
+    def test_geist_of_saint_traft_is_mixed(self):
+        """Geist has hexproof and creates tokens - Mixed."""
+        text = "Hexproof. Whenever this attacks, create a token."
+        keywords = "Hexproof"
+        cat = categorize_protection_card("Geist", text, keywords, "Creature")
+        # Has hexproof keyword, so inherent
+        assert cat in ("Inherent", "Mixed")
+    
+    def test_darksteel_brute_is_inherent(self):
+        """Darksteel Brute has indestructible - should be Inherent."""
+        text = "Indestructible"
+        keywords = "Indestructible"
+        cat = categorize_protection_card("Darksteel Brute", text, keywords, "Artifact")
+        assert cat == "Inherent"
+    
+    def test_scion_of_oona_is_grant(self):
+        """Scion of Oona grants shroud to other faeries - should be Grant."""
+        text = "Other Faeries you control have shroud."
+        keywords = "Flying, Flash"
+        cat = categorize_protection_card("Scion of Oona", text, keywords, "Creature")
+        assert cat == "Grant"
+
+
+class TestRealWorldCards:
+    """Test against actual card samples from baseline audit."""
+    
+    def test_bulwark_ox(self):
+        """Bulwark Ox - grants hexproof and indestructible."""
+        text = "Sacrifice: Creatures you control with counters gain hexproof and indestructible"
+        assert is_granting_protection(text, "")
+    
+    def test_bloodsworn_squire(self):
+        """Bloodsworn Squire - grants itself indestructible."""
+        text = "This creature gains indestructible until end of turn"
+        assert is_granting_protection(text, "")
+    
+    def test_kaldra_compleat(self):
+        """Kaldra Compleat - equipment with indestructible that grants."""
+        text = "Indestructible. Equipped creature gets +5/+5 and has indestructible"
+        keywords = "Indestructible"
+        assert is_granting_protection(text, keywords)
+    
+    def test_ward_sliver(self):
+        """Ward Sliver - grants protection to all slivers."""
+        text = "All Slivers have protection from the chosen color"
+        assert is_granting_protection(text, "")
+    
+    def test_rebbec(self):
+        """Rebbec - grants protection to artifacts."""
+        text = "Artifacts you control have protection from each mana value"
+        assert is_granting_protection(text, "")
--- a/code/web/routes/build.py
+++ b/code/web/routes/build.py
@ -170,7 +170,7 @@ def _step5_summary_placeholder_html(token: int, *, message: str | None = None) -
    return (
        f'<div id="deck-summary" data-summary '
        f'hx-get="/build/step5/summary?token={token}" '
-        'hx-trigger="load, step5:refresh from:body" hx-swap="outerHTML">'
+        'hx-trigger="step5:refresh from:body" hx-swap="outerHTML">'
        f'<div class="muted" style="margin-top:1rem;">{_esc(text)}</div>'
        '</div>'
    )
--- a/code/web/services/orchestrator.py
+++ b/code/web/services/orchestrator.py
@ -1181,6 +1181,9 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
                    # Only flip phase if previous run finished
                    if st.get('phase') in {'themes','themes-fast'}:
                        st['phase'] = 'done'
+                        # Also ensure percent is 100 when done
+                        if st.get('finished_at'):
+                            st['percent'] = 100
                        with open(status_path, 'w', encoding='utf-8') as _wf:
                            json.dump(st, _wf)
            except Exception:
@ -1463,16 +1466,17 @@ def _ensure_setup_ready(out, force: bool = False) -> None:
    except Exception:
        pass

-    # Unconditional fallback: if (for any reason) no theme export ran above, perform a fast-path export now.
-    # This guarantees that clicking Run Setup/Tagging always leaves themes current even when tagging wasn't needed.
+    # Conditional fallback: only run theme export if refresh_needed was True but somehow no export performed.
+    # This avoids repeated exports when setup is already complete and _ensure_setup_ready is called again.
    try:
-        if not theme_export_performed:
+        if not theme_export_performed and refresh_needed:
            _refresh_theme_catalog(out, force=False, fast_path=True)
    except Exception:
        pass
    else:  # If export just ran (either earlier or via fallback), ensure enrichment ran (safety double-call guard inside helper)
        try:
-            _run_theme_metadata_enrichment(out)
+            if theme_export_performed or refresh_needed:
+                _run_theme_metadata_enrichment(out)
        except Exception:
            pass

--- a/code/web/templates/base.html
+++ b/code/web/templates/base.html
@ -309,7 +309,8 @@
              .catch(function(){ /* noop */ });
          } catch(e) {}
        }
-        setInterval(pollStatus, 3000);
+        // Poll every 10 seconds instead of 3 to reduce server load (only for header indicator)
+        setInterval(pollStatus, 10000);
        pollStatus();

        // Health indicator poller
--- a/code/web/templates/build/_step5.html
+++ b/code/web/templates/build/_step5.html
@ -462,11 +462,12 @@
  <!-- controls now above -->

      {% if allow_must_haves %}
-        {% include "partials/include_exclude_summary.html" with oob=False %}
+        {% set oob = False %}
+        {% include "partials/include_exclude_summary.html" %}
      {% endif %}
      <div id="deck-summary" data-summary
           hx-get="/build/step5/summary?token={{ summary_token }}"
-           hx-trigger="load, step5:refresh from:body"
+           hx-trigger="load once, step5:refresh from:body"
           hx-swap="outerHTML">
        <div class="muted" style="margin-top:1rem;">
          {% if summary_ready %}Loading deck summary…{% else %}Deck summary will appear after the build completes.{% endif %}
--- a/code/web/templates/setup/running.html
+++ b/code/web/templates/setup/running.html
@ -127,7 +127,8 @@
      .then(update)
      .catch(function(){});
  }
-  setInterval(poll, 3000);
+  // Poll every 5 seconds instead of 3 to reduce server load
+  setInterval(poll, 5000);
  poll();
 })();
 </script>