Tagger has been entirely restructured to use vector masks instead of iterrows, total tagging time has been drastically reduced

This commit is contained in:
mwisnowski 2025-01-08 17:26:02 -08:00
parent 6de2239a47
commit c1d6b5ce18
3 changed files with 947 additions and 792 deletions

View file

@ -148,6 +148,85 @@ TOPDECK_EXCLUSION_PATTERNS = [
]
# Constants for stax functionality
# Constants for aristocrats functionality
ARISTOCRAT_TEXT_PATTERNS = [
'another creature dies',
'creature dies',
'creature dying',
'creature you control dies',
'creature you own dies',
'dies this turn',
'dies, create',
'dies, draw',
'dies, each opponent',
'dies, exile',
'dies, put',
'dies, return',
'dies, sacrifice',
'dies, you',
'has blitz',
'have blitz',
'permanents were sacrificed',
'sacrifice a creature',
'sacrifice another',
'sacrifice another creature',
'sacrifice a nontoken',
'sacrifice a permanent',
'sacrifice another nontoken',
'sacrifice another permanent',
'sacrifice another token',
'sacrifices a creature',
'sacrifices another',
'sacrifices another creature',
'sacrifices another nontoken',
'sacrifices another permanent',
'sacrifices another token',
'sacrifices a nontoken',
'sacrifices a permanent',
'sacrifices a token',
'when this creature dies',
'whenever a food',
'whenever you sacrifice'
]
ARISTOCRAT_SPECIFIC_CARDS = [
'Ashnod, Flesh Mechanist',
'Blood Artist',
'Butcher of Malakir',
'Chatterfang, Squirrel General',
'Cruel Celebrant',
'Dictate of Erebos',
'Endrek Sahr, Master Breeder',
'Gisa, Glorious Resurrector',
'Grave Pact',
'Grim Haruspex',
'Judith, the Scourge Diva',
'Korvold, Fae-Cursed King',
'Mayhem Devil',
'Midnight Reaper',
'Mikaeus, the Unhallowed',
'Pitiless Plunderer',
'Poison-Tip Archer',
'Savra, Queen of the Golgari',
'Sheoldred, the Apocalypse',
'Syr Konrad, the Grim',
'Teysa Karlov',
'Viscera Seer',
'Yawgmoth, Thran Physician',
'Zulaport Cutthroat'
]
ARISTOCRAT_EXCLUSION_PATTERNS = [
'blocking enchanted',
'blocking it',
'blocked by',
'end the turn',
'from your graveyard',
'from your hand',
'from your library',
'into your hand'
]
STAX_TEXT_PATTERNS = [
'an opponent controls'
'can\'t attack',
@ -206,6 +285,60 @@ STAX_EXCLUSION_PATTERNS = [
'from your library',
'into your hand'
]
# Constants for removal functionality
REMOVAL_TEXT_PATTERNS = [
'destroy target',
'destroys target',
'exile target',
'exiles target',
'sacrifices target',
'return target.*to.*hand',
'returns target.*to.*hand'
]
REMOVAL_SPECIFIC_CARDS = [] # type: list
REMOVAL_EXCLUSION_PATTERNS = [] # type: list
REMOVAL_KEYWORDS = [] # type: list
# Constants for counterspell functionality
COUNTERSPELL_TEXT_PATTERNS = [
'control counters a',
'counter target',
'counter that spell',
'counter all',
'counter each',
'counter the next',
'counters a spell',
'counters target',
'return target spell',
'exile target spell',
'counter unless',
'unless its controller pays'
]
COUNTERSPELL_SPECIFIC_CARDS = [
'Arcane Denial',
'Counterspell',
"Dovin's Veto",
'Force of Will',
'Mana Drain',
'Mental Misstep',
'Mindbreak Trap',
'Mystic Confluence',
'Pact of Negation',
'Swan Song'
]
COUNTERSPELL_EXCLUSION_PATTERNS = [
'counter on',
'counter from',
'remove a counter',
'move a counter',
'distribute counter',
'proliferate'
]
# Constants for theft functionality
THEFT_TEXT_PATTERNS = [
@ -277,9 +410,96 @@ BIG_MANA_KEYWORDS = [
'Improvise',
'Surge'
]
board_wipe_tags = ['destroy all', 'destroy each', 'return all', 'return each', 'deals damage to each',
'exile all', 'exile each', 'creatures get -X/-X', 'sacrifices all', 'sacrifices each',
'sacrifices the rest']
# Constants for board wipe effects
BOARD_WIPE_TEXT_PATTERNS = {
'mass_destruction': [
'destroy all',
'destroy each',
'destroy the rest',
'destroys all',
'destroys each',
'destroys the rest'
],
'mass_exile': [
'exile all',
'exile each',
'exile the rest',
'exiles all',
'exiles each',
'exiles the rest'
],
'mass_bounce': [
'return all',
'return each',
'put all creatures',
'returns all',
'returns each',
'puts all creatures'
],
'mass_sacrifice': [
'sacrifice all',
'sacrifice each',
'sacrifice the rest',
'sacrifices all',
'sacrifices each',
'sacrifices the rest'
],
'mass_damage': [
'deals damage to each',
'deals damage to all',
'deals X damage to each',
'deals X damage to all',
'deals that much damage to each',
'deals that much damage to all'
]
}
BOARD_WIPE_SPECIFIC_CARDS = [
'Akroma\'s Vengeance',
'All Is Dust',
'Austere Command',
'Blasphemous Act',
'Cleansing Nova',
'Cyclonic Rift',
'Damnation',
'Day of Judgment',
'Decree of Pain',
'Devastation Tide',
'Evacuation',
'Extinction Event',
'Farewell',
'Hour of Devastation',
'In Garruk\'s Wake',
'Living Death',
'Living End',
'Merciless Eviction',
'Nevinyrral\'s Disk',
'Oblivion Stone',
'Planar Cleansing',
'Ravnica at War',
'Shatter the Sky',
'Supreme Verdict',
'Terminus',
'Time Wipe',
'Toxic Deluge',
'Vanquish the Horde',
'Wrath of God'
]
BOARD_WIPE_EXCLUSION_PATTERNS = [
'blocking enchanted',
'blocking it',
'blocked by',
'end the turn',
'from your graveyard',
'from your hand',
'from your library',
'into your hand',
'target player\'s library',
'that player\'s library'
]
card_types = ['Artifact','Creature', 'Enchantment', 'Instant', 'Land', 'Planeswalker', 'Sorcery',
'Kindred', 'Dungeon', 'Battle']

1424
tagger.py

File diff suppressed because it is too large Load diff

View file

@ -68,13 +68,14 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series:
"""Create a boolean mask for rows where text matches one or more patterns.
Args:
df: DataFrame to search
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
regex: Whether to treat patterns as regex expressions (default: True)
combine_with_or: Whether to combine multiple patterns with OR (True) or AND (False)
Returns:
Boolean Series indicating matching rows
@ -96,7 +97,10 @@ def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
if combine_with_or:
return pd.concat(masks, axis=1).any(axis=1)
else:
return pd.concat(masks, axis=1).all(axis=1)
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where keyword text matches one or more patterns.
@ -107,12 +111,21 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege
regex: Whether to treat patterns as regex expressions (default: True)
Returns:
Boolean Series indicating matching rows
Boolean Series indicating matching rows. For rows with empty/null keywords,
returns False.
Raises:
ValueError: If type_text is empty or None
TypeError: If type_text is not a string or list of strings
ValueError: If required 'keywords' column is missing from DataFrame
"""
# Validate required columns
validate_dataframe_columns(df, {'keywords'})
# Handle empty DataFrame case
if len(df) == 0:
return pd.Series([], dtype=bool)
if not type_text:
raise ValueError("type_text cannot be empty or None")
@ -121,13 +134,18 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege
elif not isinstance(type_text, list):
raise TypeError("type_text must be a string or list of strings")
# Create default mask for null values
# Handle null values and convert to string
keywords = df['keywords'].fillna('')
# Convert non-string values to strings
keywords = keywords.astype(str)
if regex:
pattern = '|'.join(f'{p}' for p in type_text)
return df['keywords'].str.contains(pattern, case=False, na=False, regex=True)
return keywords.str.contains(pattern, case=False, na=False, regex=True)
else:
masks = [df['keywords'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text]
return pd.concat(masks, axis=1).any(axis=1)
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
"""Create a boolean mask for rows where name matches one or more patterns.
@ -269,4 +287,61 @@ def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series, tags: List[str]) ->
current_tags = df.loc[mask, 'themeTags']
# Add new tags
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series:
"""Create a boolean mask for cards with mass removal effects of a specific type.
Args:
df: DataFrame to search
effect_type: Type of mass effect to match ('destruction', 'exile', 'bounce', 'sacrifice', 'damage')
Returns:
Boolean Series indicating which cards have mass effects of the specified type
Raises:
ValueError: If effect_type is not recognized
"""
if effect_type not in settings.BOARD_WIPE_TEXT_PATTERNS:
raise ValueError(f"Unknown effect type: {effect_type}")
patterns = settings.BOARD_WIPE_TEXT_PATTERNS[effect_type]
return create_text_mask(df, patterns)
def create_damage_pattern(number: Union[int, str]) -> str:
"""Create a pattern for matching X damage effects.
Args:
number: Number or variable (X) for damage amount
Returns:
Pattern string for matching damage effects
"""
return f'deals {number} damage'
def create_mass_damage_mask(df: pd.DataFrame) -> pd.Series:
"""Create a boolean mask for cards with mass damage effects.
Args:
df: DataFrame to search
Returns:
Boolean Series indicating which cards have mass damage effects
"""
# Create patterns for numeric damage
number_patterns = [create_damage_pattern(i) for i in range(1, 21)]
# Add X damage pattern
number_patterns.append(create_damage_pattern('X'))
# Add patterns for damage targets
target_patterns = [
'to each creature',
'to all creatures',
'to each player',
'to each opponent',
'to everything'
]
# Create masks
damage_mask = create_text_mask(df, number_patterns)
target_mask = create_text_mask(df, target_patterns)
return damage_mask & target_mask