mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-09-22 04:50:46 +02:00
Massively overhauled the tagging process, refactored code to improve general effeciciency, implemented parallel processing to reduce total runtime
This commit is contained in:
parent
02e2c09874
commit
27ee13fb54
7 changed files with 742 additions and 519 deletions
|
@ -1,33 +1,29 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import random
|
||||
import pprint
|
||||
import time
|
||||
from functools import lru_cache
|
||||
# from functools import lru_cache
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import inquirer.prompt
|
||||
import keyboard
|
||||
# import keyboard
|
||||
import pandas as pd
|
||||
import pprint
|
||||
from fuzzywuzzy import process
|
||||
# from fuzzywuzzy import process
|
||||
from tqdm import tqdm
|
||||
|
||||
from settings import CSV_DIRECTORY, MULTIPLE_COPY_CARDS
|
||||
from .builder_constants import (
|
||||
BASIC_LANDS, CARD_TYPES, DEFAULT_NON_BASIC_LAND_SLOTS,
|
||||
COMMANDER_CSV_PATH, FUZZY_MATCH_THRESHOLD, MAX_FUZZY_CHOICES, FETCH_LAND_DEFAULT_COUNT,
|
||||
FETCH_LAND_DEFAULT_COUNT,
|
||||
COMMANDER_POWER_DEFAULT, COMMANDER_TOUGHNESS_DEFAULT, COMMANDER_MANA_COST_DEFAULT,
|
||||
COMMANDER_MANA_VALUE_DEFAULT, COMMANDER_TYPE_DEFAULT, COMMANDER_TEXT_DEFAULT,
|
||||
THEME_PRIORITY_BONUS, THEME_POOL_SIZE_MULTIPLIER, DECK_DIRECTORY,
|
||||
COMMANDER_COLOR_IDENTITY_DEFAULT, COMMANDER_COLORS_DEFAULT, COMMANDER_TAGS_DEFAULT,
|
||||
COMMANDER_THEMES_DEFAULT, COMMANDER_CREATURE_TYPES_DEFAULT, DUAL_LAND_TYPE_MAP,
|
||||
CSV_READ_TIMEOUT, CSV_PROCESSING_BATCH_SIZE, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS,
|
||||
CSV_READ_TIMEOUT, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS,
|
||||
STAPLE_LAND_CONDITIONS, TRIPLE_LAND_TYPE_MAP, MISC_LAND_MAX_COUNT, MISC_LAND_MIN_COUNT,
|
||||
MISC_LAND_POOL_SIZE, LAND_REMOVAL_MAX_ATTEMPTS, PROTECTED_LANDS,
|
||||
MANA_COLORS, MANA_PIP_PATTERNS, THEME_WEIGHT_MULTIPLIER
|
||||
MANA_COLORS, THEME_WEIGHT_MULTIPLIER
|
||||
)
|
||||
from . import builder_utils
|
||||
from file_setup import setup_utils
|
||||
|
@ -75,7 +71,7 @@ from type_definitions import (
|
|||
ArtifactDF,
|
||||
CreatureDF,
|
||||
NonCreatureDF,
|
||||
PlaneswalkerDF,
|
||||
|
||||
NonPlaneswalkerDF)
|
||||
|
||||
import logging_util
|
||||
|
|
|
@ -2,16 +2,14 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, List, Optional, Tuple, Union
|
||||
from typing import Any, List, Tuple, Union
|
||||
|
||||
import inquirer.prompt
|
||||
import inquirer
|
||||
from settings import (
|
||||
COLORS, COLOR_ABRV
|
||||
COLOR_ABRV
|
||||
)
|
||||
from deck_builder.builder_constants import (DEFAULT_MAX_CARD_PRICE,
|
||||
DEFAULT_MAX_DECK_PRICE, DEFAULT_THEME_TAGS, MONO_COLOR_MAP,
|
||||
DEFAULT_THEME_TAGS, MONO_COLOR_MAP,
|
||||
DUAL_COLOR_MAP, TRI_COLOR_MAP, OTHER_COLOR_MAP
|
||||
)
|
||||
|
||||
|
@ -26,7 +24,7 @@ from exceptions import (
|
|||
InvalidNumberError,
|
||||
InvalidQuestionTypeError,
|
||||
MaxAttemptsError,
|
||||
PriceError,
|
||||
|
||||
PriceLimitError,
|
||||
PriceValidationError
|
||||
)
|
||||
|
|
|
@ -13,7 +13,7 @@ from pathlib import Path
|
|||
from typing import NoReturn, Optional
|
||||
|
||||
# Third-party imports
|
||||
import inquirer.prompt
|
||||
import inquirer
|
||||
|
||||
# Local imports
|
||||
from deck_builder import DeckBuilder
|
||||
|
@ -104,7 +104,7 @@ def run_menu() -> NoReturn:
|
|||
case 'Setup':
|
||||
setup()
|
||||
case 'Tag CSV Files':
|
||||
tagger.run_tagging()
|
||||
tagger.run_tagging(parallel=True)
|
||||
case 'Build a Deck':
|
||||
builder.determine_commander()
|
||||
case 'Quit':
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Dict, List, Optional, Final, Tuple, Pattern, Union, Callable
|
||||
from typing import Dict, List, Final
|
||||
|
||||
TRIGGERS: List[str] = ['when', 'whenever', 'at']
|
||||
|
||||
|
@ -20,7 +20,7 @@ TAG_GROUPS: Dict[str, List[str]] = {
|
|||
}
|
||||
|
||||
# Common regex patterns
|
||||
PATTERN_GROUPS: Dict[str, Optional[str]] = {
|
||||
PATTERN_GROUPS: Dict[str, str] = {
|
||||
"draw": r"draw[s]? a card|draw[s]? one card",
|
||||
"combat": r"attack[s]?|block[s]?|combat damage",
|
||||
"tokens": r"create[s]? .* token|put[s]? .* token",
|
||||
|
@ -30,6 +30,31 @@ PATTERN_GROUPS: Dict[str, Optional[str]] = {
|
|||
"cost_reduction": r"cost[s]? \{[\d\w]\} less|affinity for|cost[s]? less to cast|chosen type cost|copy cost|from exile cost|from exile this turn cost|from your graveyard cost|has undaunted|have affinity for artifacts|other than your hand cost|spells cost|spells you cast cost|that target .* cost|those spells cost|you cast cost|you pay cost"
|
||||
}
|
||||
|
||||
# Common phrase groups (lists) used across taggers
|
||||
PHRASE_GROUPS: Dict[str, List[str]] = {
|
||||
# Variants for monarch wording
|
||||
"monarch": [
|
||||
r"becomes? the monarch",
|
||||
r"can\'t become the monarch",
|
||||
r"is the monarch",
|
||||
r"was the monarch",
|
||||
r"you are the monarch",
|
||||
r"you become the monarch",
|
||||
r"you can\'t become the monarch",
|
||||
r"you\'re the monarch"
|
||||
],
|
||||
# Variants for blink-style return to battlefield wording
|
||||
"blink_return": [
|
||||
r"return it to the battlefield",
|
||||
r"return that card to the battlefield",
|
||||
r"return them to the battlefield",
|
||||
r"return those cards to the battlefield",
|
||||
r"return .* to the battlefield"
|
||||
]
|
||||
}
|
||||
# Common action patterns
|
||||
CREATE_ACTION_PATTERN: Final[str] = r"create|put"
|
||||
|
||||
# Creature/Counter types
|
||||
COUNTER_TYPES: List[str] = [r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2',
|
||||
'-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2',
|
||||
|
@ -128,7 +153,7 @@ REQUIRED_COLUMNS: List[str] = [
|
|||
]
|
||||
|
||||
# Mapping of card types to their corresponding theme tags
|
||||
TYPE_TAG_MAPPING: List[str] = {
|
||||
TYPE_TAG_MAPPING: Dict[str, List[str]] = {
|
||||
'Artifact': ['Artifacts Matter'],
|
||||
'Battle': ['Battles Matter'],
|
||||
#'Creature': [],
|
||||
|
@ -268,12 +293,12 @@ LANDS_MATTER_PATTERNS: Dict[str, List[str]] = {
|
|||
]
|
||||
}
|
||||
|
||||
DOMAIN_PATTERNS: List[str] = {
|
||||
DOMAIN_PATTERNS: Dict[str, List[str]] = {
|
||||
'keyword': ['domain'],
|
||||
'text': ['basic land types among lands you control']
|
||||
}
|
||||
|
||||
LANDFALL_PATTERNS: List[str] = {
|
||||
LANDFALL_PATTERNS: Dict[str, List[str]] = {
|
||||
'keyword': ['landfall'],
|
||||
'triggers': [
|
||||
'whenever a land enters the battlefield under your control',
|
||||
|
@ -281,7 +306,7 @@ LANDFALL_PATTERNS: List[str] = {
|
|||
]
|
||||
}
|
||||
|
||||
LANDWALK_PATTERNS: List[str] = {
|
||||
LANDWALK_PATTERNS: Dict[str, List[str]] = {
|
||||
'basic': [
|
||||
'plainswalker',
|
||||
'islandwalk',
|
||||
|
@ -404,7 +429,7 @@ ARISTOCRAT_EXCLUSION_PATTERNS: List[str] = [
|
|||
|
||||
# Constants for stax functionality
|
||||
STAX_TEXT_PATTERNS: List[str] = [
|
||||
'an opponent controls'
|
||||
'an opponent controls',
|
||||
'can\'t attack',
|
||||
'can\'t be cast',
|
||||
'can\'t be activated',
|
||||
|
@ -422,11 +447,7 @@ STAX_TEXT_PATTERNS: List[str] = [
|
|||
'opponents control',
|
||||
'opponents control can\'t',
|
||||
'opponents control enter tapped',
|
||||
'spells cost {1} more',
|
||||
'spells cost {2} more',
|
||||
'spells cost {3} more',
|
||||
'spells cost {4} more',
|
||||
'spells cost {5} more',
|
||||
r'spells cost \{\d\} more',
|
||||
'that player doesn\'t',
|
||||
'unless that player pays',
|
||||
'you control your opponent',
|
||||
|
|
|
@ -16,7 +16,10 @@ from __future__ import annotations
|
|||
|
||||
# Standard library imports
|
||||
import re
|
||||
from typing import List, Set, Union, Any
|
||||
from typing import List, Set, Union, Any, Tuple
|
||||
from functools import lru_cache
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Third-party imports
|
||||
import pandas as pd
|
||||
|
@ -24,6 +27,43 @@ import pandas as pd
|
|||
# Local application imports
|
||||
from . import tag_constants
|
||||
|
||||
|
||||
# --- Internal helpers for performance -----------------------------------------------------------
|
||||
@lru_cache(maxsize=2048)
|
||||
def _build_joined_pattern(parts: Tuple[str, ...]) -> str:
|
||||
"""Join multiple regex parts with '|'. Cached for reuse across calls."""
|
||||
return '|'.join(parts)
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def _compile_pattern(pattern: str, ignore_case: bool = True):
|
||||
"""Compile a regex pattern with optional IGNORECASE. Cached for reuse."""
|
||||
flags = re.IGNORECASE if ignore_case else 0
|
||||
return re.compile(pattern, flags)
|
||||
|
||||
def _ensure_norm_series(df: pd.DataFrame, source_col: str, norm_col: str) -> pd.Series:
|
||||
"""Ensure a cached normalized string series exists on df for source_col.
|
||||
|
||||
Normalization here means: fillna('') and cast to str once. This avoids
|
||||
repeating fill/astype work on every mask creation. Extra columns are
|
||||
later dropped by final reindex in output.
|
||||
|
||||
Args:
|
||||
df: DataFrame containing the column
|
||||
source_col: Name of the source column (e.g., 'text')
|
||||
norm_col: Name of the cache column to create/use (e.g., '__text_s')
|
||||
|
||||
Returns:
|
||||
The normalized pandas Series.
|
||||
"""
|
||||
if norm_col in df.columns:
|
||||
return df[norm_col]
|
||||
# Create normalized string series
|
||||
series = df[source_col].fillna('') if source_col in df.columns else pd.Series([''] * len(df), index=df.index)
|
||||
series = series.astype(str)
|
||||
df[norm_col] = series
|
||||
return df[norm_col]
|
||||
|
||||
def pluralize(word: str) -> str:
|
||||
"""Convert a word to its plural form using basic English pluralization rules.
|
||||
|
||||
|
@ -78,12 +118,21 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
|||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if len(df) == 0:
|
||||
return pd.Series([], dtype=bool)
|
||||
|
||||
# Use normalized cached series
|
||||
type_series = _ensure_norm_series(df, 'type', '__type_s')
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['type'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
||||
return type_series.str.contains(compiled, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
masks = [type_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
if not masks:
|
||||
return pd.Series(False, index=df.index)
|
||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
||||
|
||||
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series[bool]:
|
||||
"""Create a boolean mask for rows where text matches one or more patterns.
|
||||
|
@ -109,15 +158,22 @@ def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
|||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if len(df) == 0:
|
||||
return pd.Series([], dtype=bool)
|
||||
|
||||
# Use normalized cached series
|
||||
text_series = _ensure_norm_series(df, 'text', '__text_s')
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
||||
return text_series.str.contains(compiled, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
if combine_with_or:
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
else:
|
||||
return pd.concat(masks, axis=1).all(axis=1)
|
||||
masks = [text_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
if not masks:
|
||||
return pd.Series(False, index=df.index)
|
||||
reduced = np.logical_or.reduce(masks) if combine_with_or else np.logical_and.reduce(masks)
|
||||
return pd.Series(reduced, index=df.index)
|
||||
|
||||
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
||||
"""Create a boolean mask for rows where keyword text matches one or more patterns.
|
||||
|
@ -151,18 +207,18 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege
|
|||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
# Create default mask for null values
|
||||
# Handle null values and convert to string
|
||||
keywords = df['keywords'].fillna('')
|
||||
# Convert non-string values to strings
|
||||
keywords = keywords.astype(str)
|
||||
# Use normalized cached series for keywords
|
||||
keywords = _ensure_norm_series(df, 'keywords', '__keywords_s')
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return keywords.str.contains(pattern, case=False, na=False, regex=True)
|
||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
||||
return keywords.str.contains(compiled, na=False, regex=True)
|
||||
else:
|
||||
masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
if not masks:
|
||||
return pd.Series(False, index=df.index)
|
||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
||||
|
||||
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
||||
"""Create a boolean mask for rows where name matches one or more patterns.
|
||||
|
@ -187,12 +243,21 @@ def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
|||
elif not isinstance(type_text, list):
|
||||
raise TypeError("type_text must be a string or list of strings")
|
||||
|
||||
if len(df) == 0:
|
||||
return pd.Series([], dtype=bool)
|
||||
|
||||
# Use normalized cached series
|
||||
name_series = _ensure_norm_series(df, 'name', '__name_s')
|
||||
|
||||
if regex:
|
||||
pattern = '|'.join(f'{p}' for p in type_text)
|
||||
return df['name'].str.contains(pattern, case=False, na=False, regex=True)
|
||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
||||
return name_series.str.contains(compiled, na=False, regex=True)
|
||||
else:
|
||||
masks = [df['name'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
return pd.concat(masks, axis=1).any(axis=1)
|
||||
masks = [name_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||
if not masks:
|
||||
return pd.Series(False, index=df.index)
|
||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
||||
|
||||
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
|
||||
"""Extract creature types from a type text string.
|
||||
|
@ -307,6 +372,31 @@ def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series[bool], tags: Union[st
|
|||
# Add new tags
|
||||
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
|
||||
|
||||
def apply_rules(df: pd.DataFrame, rules: List[dict]) -> None:
|
||||
"""Apply a list of rules to a DataFrame.
|
||||
|
||||
Each rule dict supports:
|
||||
- mask: pd.Series of booleans or a callable df->mask
|
||||
- tags: str|List[str]
|
||||
|
||||
Example:
|
||||
rules = [
|
||||
{ 'mask': lambda d: create_text_mask(d, 'lifelink'), 'tags': ['Lifelink'] },
|
||||
]
|
||||
|
||||
Args:
|
||||
df: DataFrame to update
|
||||
rules: list of rule dicts
|
||||
"""
|
||||
for rule in rules:
|
||||
mask = rule.get('mask')
|
||||
if callable(mask):
|
||||
mask = mask(df)
|
||||
if mask is None:
|
||||
continue
|
||||
tags = rule.get('tags', [])
|
||||
apply_tag_vectorized(df, mask, tags)
|
||||
|
||||
def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[bool]:
|
||||
"""Create a boolean mask for cards with mass removal effects of a specific type.
|
||||
|
||||
|
@ -326,6 +416,60 @@ def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[boo
|
|||
patterns = tag_constants.BOARD_WIPE_TEXT_PATTERNS[effect_type]
|
||||
return create_text_mask(df, patterns)
|
||||
|
||||
def create_trigger_mask(
|
||||
df: pd.DataFrame,
|
||||
subjects: Union[str, List[str]],
|
||||
include_attacks: bool = False,
|
||||
) -> pd.Series:
|
||||
"""Create a mask for text that contains trigger phrases followed by subjects.
|
||||
|
||||
Example: with subjects=['a creature','you'] builds patterns:
|
||||
'when a creature', 'whenever you', 'at you', etc.
|
||||
|
||||
Args:
|
||||
df: DataFrame
|
||||
subjects: A subject string or list (will be normalized to list)
|
||||
include_attacks: If True, also include '{trigger} .* attacks'
|
||||
|
||||
Returns:
|
||||
Boolean Series mask
|
||||
"""
|
||||
subs = [subjects] if isinstance(subjects, str) else subjects
|
||||
patterns: List[str] = []
|
||||
for trig in tag_constants.TRIGGERS:
|
||||
patterns.extend([f"{trig} {s}" for s in subs])
|
||||
if include_attacks:
|
||||
patterns.append(f"{trig} .* attacks")
|
||||
return create_text_mask(df, patterns)
|
||||
|
||||
def create_numbered_phrase_mask(
|
||||
df: pd.DataFrame,
|
||||
verb: Union[str, List[str]],
|
||||
noun: str = '',
|
||||
numbers: List[str] | None = None,
|
||||
) -> pd.Series:
|
||||
"""Create a boolean mask for phrases like 'draw {num} card'.
|
||||
|
||||
Args:
|
||||
df: DataFrame to search
|
||||
verb: Action verb or list of verbs (e.g., 'draw' or ['gain', 'gains'])
|
||||
noun: Optional object noun in singular form (e.g., 'card'); if empty, omitted
|
||||
numbers: Optional list of number words/digits (defaults to tag_constants.NUM_TO_SEARCH)
|
||||
|
||||
Returns:
|
||||
Boolean Series mask
|
||||
"""
|
||||
if numbers is None:
|
||||
numbers = tag_constants.NUM_TO_SEARCH
|
||||
# Normalize verbs to list
|
||||
verbs = [verb] if isinstance(verb, str) else verb
|
||||
# Build patterns
|
||||
if noun:
|
||||
patterns = [fr"{v}\s+{num}\s+{noun}" for v in verbs for num in numbers]
|
||||
else:
|
||||
patterns = [fr"{v}\s+{num}" for v in verbs for num in numbers]
|
||||
return create_text_mask(df, patterns)
|
||||
|
||||
def create_damage_pattern(number: Union[int, str]) -> str:
|
||||
"""Create a pattern for matching X damage effects.
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue