mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-09-22 04:50:46 +02:00
Refactored multiple tagger functions, through to lifegain related functions so that they use vector masks in place of iterrows
This commit is contained in:
parent
083ef937af
commit
aac2b26be8
3 changed files with 1818 additions and 1227 deletions
48
settings.py
48
settings.py
|
@ -206,4 +206,52 @@ COLUMN_ORDER = [
|
|||
|
||||
# Constants for type detection and processing
|
||||
OUTLAW_TYPES = ['Assassin', 'Mercenary', 'Pirate', 'Rogue', 'Warlock']
|
||||
TYPE_DETECTION_BATCH_SIZE = 1000
|
||||
|
||||
# Aura-related constants
|
||||
AURA_SPECIFIC_CARDS = [
|
||||
'Ardenn, Intrepid Archaeologist', # Aura movement
|
||||
'Calix, Guided By Fate', # Create duplicate Auras
|
||||
'Gilwain, Casting Director', # Creates role tokens
|
||||
'Ivy, Gleeful Spellthief', # Copies spells that have single target
|
||||
'Killian, Ink Duelist', # Targetted spell cost reduction
|
||||
]
|
||||
# Equipment-related constants
|
||||
EQUIPMENT_EXCLUSIONS = [
|
||||
'Bruenor Battlehammer', # Equipment cost reduction
|
||||
'Nazahn, Revered Bladesmith', # Equipment tutor
|
||||
'Stonehewer Giant', # Equipment tutor
|
||||
]
|
||||
|
||||
EQUIPMENT_SPECIFIC_CARDS = [
|
||||
'Ardenn, Intrepid Archaeologist', # Equipment movement
|
||||
'Armory Automaton', # Mass equip ability
|
||||
'Brass Squire', # Free equip ability
|
||||
'Danitha Capashen, Paragon', # Equipment cost reduction
|
||||
'Halvar, God of Battle', # Equipment movement
|
||||
'Kemba, Kha Regent', # Equipment payoff
|
||||
'Kosei, Penitent Warlord', # Wants to be eequipped
|
||||
'Puresteel Paladin', # Equipment draw engine
|
||||
'Reyav, Master Smith', # Equipment combat boost
|
||||
'Sram, Senior Edificer', # Equipment card draw
|
||||
'Valduk, Keeper of the Flame' # Equipment token creation
|
||||
]
|
||||
|
||||
EQUIPMENT_RELATED_TAGS = [
|
||||
'Equipment', # Base equipment tag
|
||||
'Equipment Matters', # Cards that care about equipment
|
||||
'Voltron', # Commander-focused equipment strategy
|
||||
'Artifacts Matter', # Equipment are artifacts
|
||||
'Warriors Matter', # Common equipment tribal synergy
|
||||
'Knights Matter' # Common equipment tribal synergy
|
||||
]
|
||||
|
||||
EQUIPMENT_TEXT_PATTERNS = [
|
||||
'attach', # Equipment attachment
|
||||
'equip', # Equipment keyword
|
||||
'equipped', # Equipment state
|
||||
'equipment', # Equipment type
|
||||
'unattach', # Equipment removal
|
||||
'unequip', # Equipment removal
|
||||
]
|
||||
TYPE_DETECTION_BATCH_SIZE = 1000
|
97
utility.py
97
utility.py
|
@ -1,9 +1,12 @@
|
|||
from typing import Union, List
|
||||
import pandas as pd
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
from typing import Dict, List, Optional, Set, Union
|
||||
from time import perf_counter
|
||||
|
||||
import settings
|
||||
|
||||
def pluralize(word: str) -> str:
|
||||
"""Convert a word to its plural form using basic English pluralization rules.
|
||||
|
||||
|
@ -65,6 +68,96 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
|||
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
|
||||
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
|
||||
"""Create a boolean mask for rows where text matches one or more patterns.
|
||||
|
||||
Args:
|
||||
df: DataFrame to search
|
||||
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
|
||||
regex: Whether to treat patterns as regex expressions (default: True)
|
||||
|
||||
Returns:
|
||||
Boolean Series indicating matching rows
|
||||
|
||||
Raises:
|
||||
ValueError: If type_text is empty or None
|
||||
TypeError: If type_text is not a string or list of strings
|
||||
"""
|
||||
if not type_text:
|
||||
raise ValueError("type_text cannot be empty or None")
|
||||
|
||||
if isinstance(type_text, str):
|
||||
type_text = [type_text]
|
||||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
|
||||
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
|
||||
"""Create a boolean mask for rows where keyword text matches one or more patterns.
|
||||
|
||||
Args:
|
||||
df: DataFrame to search
|
||||
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
|
||||
regex: Whether to treat patterns as regex expressions (default: True)
|
||||
|
||||
Returns:
|
||||
Boolean Series indicating matching rows
|
||||
|
||||
Raises:
|
||||
ValueError: If type_text is empty or None
|
||||
TypeError: If type_text is not a string or list of strings
|
||||
"""
|
||||
if not type_text:
|
||||
raise ValueError("type_text cannot be empty or None")
|
||||
|
||||
if isinstance(type_text, str):
|
||||
type_text = [type_text]
|
||||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['keywords'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['keywords'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
|
||||
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series:
|
||||
"""Create a boolean mask for rows where name matches one or more patterns.
|
||||
|
||||
Args:
|
||||
df: DataFrame to search
|
||||
type_text: Type text pattern(s) to match. Can be a single string or list of strings.
|
||||
regex: Whether to treat patterns as regex expressions (default: True)
|
||||
|
||||
Returns:
|
||||
Boolean Series indicating matching rows
|
||||
|
||||
Raises:
|
||||
ValueError: If type_text is empty or None
|
||||
TypeError: If type_text is not a string or list of strings
|
||||
"""
|
||||
if not type_text:
|
||||
raise ValueError("type_text cannot be empty or None")
|
||||
|
||||
if isinstance(type_text, str):
|
||||
type_text = [type_text]
|
||||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['name'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['name'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
|
||||
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
|
||||
"""Extract creature types from a type text string.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue