"""Tests for M3 metadata/theme tag partition functionality.

Tests cover:
- Tag classification (metadata vs theme)
- Column creation and data migration  
- Feature flag behavior
- Compatibility with missing columns
- CSV read/write with new schema
"""
import pandas as pd
import pytest
from code.tagging import tag_utils
from code.tagging.tagger import _apply_metadata_partition


class TestTagClassification:
    """Tests for classify_tag function."""
    
    def test_prefix_based_metadata(self):
        """Metadata tags identified by prefix."""
        assert tag_utils.classify_tag("Applied: Cost Reduction") == "metadata"
        assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
        assert tag_utils.classify_tag("Diagnostic: Test") == "metadata"
        assert tag_utils.classify_tag("Internal: Debug") == "metadata"
    
    def test_exact_match_metadata(self):
        """Metadata tags identified by exact match."""
        assert tag_utils.classify_tag("Bracket: Game Changer") == "metadata"
        assert tag_utils.classify_tag("Bracket: Staple") == "metadata"
    
    def test_kindred_protection_metadata(self):
        """Kindred protection tags are metadata."""
        assert tag_utils.classify_tag("Knights Gain Protection") == "metadata"
        assert tag_utils.classify_tag("Frogs Gain Protection") == "metadata"
        assert tag_utils.classify_tag("Zombies Gain Protection") == "metadata"
    
    def test_theme_classification(self):
        """Regular gameplay tags are themes."""
        assert tag_utils.classify_tag("Card Draw") == "theme"
        assert tag_utils.classify_tag("Spellslinger") == "theme"
        assert tag_utils.classify_tag("Tokens Matter") == "theme"
        assert tag_utils.classify_tag("Ramp") == "theme"
        assert tag_utils.classify_tag("Protection") == "theme"
    
    def test_edge_cases(self):
        """Edge cases in tag classification."""
        # Empty string
        assert tag_utils.classify_tag("") == "theme"
        
        # Similar but not exact matches
        assert tag_utils.classify_tag("Apply: Something") == "theme"  # Wrong prefix
        assert tag_utils.classify_tag("Knights Have Protection") == "theme"  # Not "Gain"
        
        # Case sensitivity
        assert tag_utils.classify_tag("applied: Cost Reduction") == "theme"  # Lowercase


class TestMetadataPartition:
    """Tests for _apply_metadata_partition function."""
    
    def test_basic_partition(self, monkeypatch):
        """Basic partition splits tags correctly."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A', 'Card B'],
            'themeTags': [
                ['Card Draw', 'Applied: Cost Reduction'],
                ['Spellslinger', 'Bracket: Game Changer', 'Tokens Matter']
            ]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        # Check theme tags
        assert df_out.loc[0, 'themeTags'] == ['Card Draw']
        assert df_out.loc[1, 'themeTags'] == ['Spellslinger', 'Tokens Matter']
        
        # Check metadata tags
        assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
        assert df_out.loc[1, 'metadataTags'] == ['Bracket: Game Changer']
        
        # Check diagnostics
        assert diag['enabled'] is True
        assert diag['rows_with_tags'] == 2
        assert diag['metadata_tags_moved'] == 2
        assert diag['theme_tags_kept'] == 3
    
    def test_empty_tags(self, monkeypatch):
        """Handles empty tag lists."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A', 'Card B'],
            'themeTags': [[], ['Card Draw']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        assert df_out.loc[0, 'themeTags'] == []
        assert df_out.loc[0, 'metadataTags'] == []
        assert df_out.loc[1, 'themeTags'] == ['Card Draw']
        assert df_out.loc[1, 'metadataTags'] == []
        
        assert diag['rows_with_tags'] == 1
    
    def test_all_metadata_tags(self, monkeypatch):
        """Handles rows with only metadata tags."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Applied: Cost Reduction', 'Bracket: Game Changer']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        assert df_out.loc[0, 'themeTags'] == []
        assert df_out.loc[0, 'metadataTags'] == ['Applied: Cost Reduction', 'Bracket: Game Changer']
        
        assert diag['metadata_tags_moved'] == 2
        assert diag['theme_tags_kept'] == 0
    
    def test_all_theme_tags(self, monkeypatch):
        """Handles rows with only theme tags."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Card Draw', 'Ramp', 'Spellslinger']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Ramp', 'Spellslinger']
        assert df_out.loc[0, 'metadataTags'] == []
        
        assert diag['metadata_tags_moved'] == 0
        assert diag['theme_tags_kept'] == 3
    
    def test_feature_flag_disabled(self, monkeypatch):
        """Feature flag disables partition."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '0')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        # Should not create metadataTags column
        assert 'metadataTags' not in df_out.columns
        
        # Should not modify themeTags
        assert df_out.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
        
        # Should indicate disabled
        assert diag['enabled'] is False
    
    def test_missing_theme_tags_column(self, monkeypatch):
        """Handles missing themeTags column gracefully."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'other_column': ['value']
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        # Should return unchanged
        assert 'themeTags' not in df_out.columns
        assert 'metadataTags' not in df_out.columns
        
        # Should indicate error
        assert diag['enabled'] is True
        assert 'error' in diag
    
    def test_non_list_tags(self, monkeypatch):
        """Handles non-list values in themeTags."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A', 'Card B', 'Card C'],
            'themeTags': [['Card Draw'], None, 'not a list']
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        # Only first row should be processed
        assert df_out.loc[0, 'themeTags'] == ['Card Draw']
        assert df_out.loc[0, 'metadataTags'] == []
        
        assert diag['rows_with_tags'] == 1
    
    def test_kindred_protection_partition(self, monkeypatch):
        """Kindred protection tags are moved to metadata."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Protection', 'Knights Gain Protection', 'Card Draw']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        assert 'Protection' in df_out.loc[0, 'themeTags']
        assert 'Card Draw' in df_out.loc[0, 'themeTags']
        assert 'Knights Gain Protection' in df_out.loc[0, 'metadataTags']
    
    def test_diagnostics_structure(self, monkeypatch):
        """Diagnostics contain expected fields."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
        })
        
        df_out, diag = _apply_metadata_partition(df)
        
        # Check required diagnostic fields
        assert 'enabled' in diag
        assert 'total_rows' in diag
        assert 'rows_with_tags' in diag
        assert 'metadata_tags_moved' in diag
        assert 'theme_tags_kept' in diag
        assert 'unique_metadata_tags' in diag
        assert 'unique_theme_tags' in diag
        assert 'most_common_metadata' in diag
        assert 'most_common_themes' in diag
        
        # Check types
        assert isinstance(diag['most_common_metadata'], list)
        assert isinstance(diag['most_common_themes'], list)


class TestCSVCompatibility:
    """Tests for CSV read/write with new schema."""
    
    def test_csv_roundtrip_with_metadata(self, tmp_path, monkeypatch):
        """CSV roundtrip preserves both columns."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        csv_path = tmp_path / "test_cards.csv"
        
        # Create initial dataframe
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Card Draw', 'Ramp']],
            'metadataTags': [['Applied: Cost Reduction']]
        })
        
        # Write to CSV
        df.to_csv(csv_path, index=False)
        
        # Read back
        df_read = pd.read_csv(
            csv_path,
            converters={'themeTags': pd.eval, 'metadataTags': pd.eval}
        )
        
        # Verify data preserved
        assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Ramp']
        assert df_read.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']
    
    def test_csv_backward_compatible(self, tmp_path, monkeypatch):
        """Can read old CSVs without metadataTags."""
        monkeypatch.setenv('TAG_METADATA_SPLIT', '1')
        
        csv_path = tmp_path / "old_cards.csv"
        
        # Create old-style CSV without metadataTags
        df = pd.DataFrame({
            'name': ['Card A'],
            'themeTags': [['Card Draw', 'Applied: Cost Reduction']]
        })
        df.to_csv(csv_path, index=False)
        
        # Read back
        df_read = pd.read_csv(csv_path, converters={'themeTags': pd.eval})
        
        # Should read successfully
        assert 'themeTags' in df_read.columns
        assert 'metadataTags' not in df_read.columns
        assert df_read.loc[0, 'themeTags'] == ['Card Draw', 'Applied: Cost Reduction']
        
        # Apply partition
        df_partitioned, _ = _apply_metadata_partition(df_read)
        
        # Should now have both columns
        assert 'themeTags' in df_partitioned.columns
        assert 'metadataTags' in df_partitioned.columns
        assert df_partitioned.loc[0, 'themeTags'] == ['Card Draw']
        assert df_partitioned.loc[0, 'metadataTags'] == ['Applied: Cost Reduction']


if __name__ == "__main__":
    pytest.main([__file__, "-v"])