mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-09-22 04:50:46 +02:00
Revert "Massively overhauled the tagging process, refactored code to improve general effeciciency, implemented parallel processing to reduce total runtime"
This reverts commit 27ee13fb54
.
This commit is contained in:
parent
039b8fe89e
commit
dd04dd0505
7 changed files with 96 additions and 277 deletions
|
@ -1,29 +1,33 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import pprint
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import random
|
||||||
import time
|
import time
|
||||||
# from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
# import keyboard
|
import inquirer.prompt
|
||||||
|
import keyboard
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
# from fuzzywuzzy import process
|
import pprint
|
||||||
|
from fuzzywuzzy import process
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from settings import CSV_DIRECTORY, MULTIPLE_COPY_CARDS
|
from settings import CSV_DIRECTORY, MULTIPLE_COPY_CARDS
|
||||||
from .builder_constants import (
|
from .builder_constants import (
|
||||||
BASIC_LANDS, CARD_TYPES, DEFAULT_NON_BASIC_LAND_SLOTS,
|
BASIC_LANDS, CARD_TYPES, DEFAULT_NON_BASIC_LAND_SLOTS,
|
||||||
FETCH_LAND_DEFAULT_COUNT,
|
COMMANDER_CSV_PATH, FUZZY_MATCH_THRESHOLD, MAX_FUZZY_CHOICES, FETCH_LAND_DEFAULT_COUNT,
|
||||||
COMMANDER_POWER_DEFAULT, COMMANDER_TOUGHNESS_DEFAULT, COMMANDER_MANA_COST_DEFAULT,
|
COMMANDER_POWER_DEFAULT, COMMANDER_TOUGHNESS_DEFAULT, COMMANDER_MANA_COST_DEFAULT,
|
||||||
COMMANDER_MANA_VALUE_DEFAULT, COMMANDER_TYPE_DEFAULT, COMMANDER_TEXT_DEFAULT,
|
COMMANDER_MANA_VALUE_DEFAULT, COMMANDER_TYPE_DEFAULT, COMMANDER_TEXT_DEFAULT,
|
||||||
THEME_PRIORITY_BONUS, THEME_POOL_SIZE_MULTIPLIER, DECK_DIRECTORY,
|
THEME_PRIORITY_BONUS, THEME_POOL_SIZE_MULTIPLIER, DECK_DIRECTORY,
|
||||||
COMMANDER_COLOR_IDENTITY_DEFAULT, COMMANDER_COLORS_DEFAULT, COMMANDER_TAGS_DEFAULT,
|
COMMANDER_COLOR_IDENTITY_DEFAULT, COMMANDER_COLORS_DEFAULT, COMMANDER_TAGS_DEFAULT,
|
||||||
COMMANDER_THEMES_DEFAULT, COMMANDER_CREATURE_TYPES_DEFAULT, DUAL_LAND_TYPE_MAP,
|
COMMANDER_THEMES_DEFAULT, COMMANDER_CREATURE_TYPES_DEFAULT, DUAL_LAND_TYPE_MAP,
|
||||||
CSV_READ_TIMEOUT, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS,
|
CSV_READ_TIMEOUT, CSV_PROCESSING_BATCH_SIZE, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS,
|
||||||
STAPLE_LAND_CONDITIONS, TRIPLE_LAND_TYPE_MAP, MISC_LAND_MAX_COUNT, MISC_LAND_MIN_COUNT,
|
STAPLE_LAND_CONDITIONS, TRIPLE_LAND_TYPE_MAP, MISC_LAND_MAX_COUNT, MISC_LAND_MIN_COUNT,
|
||||||
MISC_LAND_POOL_SIZE, LAND_REMOVAL_MAX_ATTEMPTS, PROTECTED_LANDS,
|
MISC_LAND_POOL_SIZE, LAND_REMOVAL_MAX_ATTEMPTS, PROTECTED_LANDS,
|
||||||
MANA_COLORS, THEME_WEIGHT_MULTIPLIER
|
MANA_COLORS, MANA_PIP_PATTERNS, THEME_WEIGHT_MULTIPLIER
|
||||||
)
|
)
|
||||||
from . import builder_utils
|
from . import builder_utils
|
||||||
from file_setup import setup_utils
|
from file_setup import setup_utils
|
||||||
|
@ -71,7 +75,7 @@ from type_definitions import (
|
||||||
ArtifactDF,
|
ArtifactDF,
|
||||||
CreatureDF,
|
CreatureDF,
|
||||||
NonCreatureDF,
|
NonCreatureDF,
|
||||||
|
PlaneswalkerDF,
|
||||||
NonPlaneswalkerDF)
|
NonPlaneswalkerDF)
|
||||||
|
|
||||||
import logging_util
|
import logging_util
|
||||||
|
|
|
@ -2,14 +2,16 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any, List, Tuple, Union
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import inquirer
|
import inquirer.prompt
|
||||||
from settings import (
|
from settings import (
|
||||||
COLOR_ABRV
|
COLORS, COLOR_ABRV
|
||||||
)
|
)
|
||||||
from deck_builder.builder_constants import (DEFAULT_MAX_CARD_PRICE,
|
from deck_builder.builder_constants import (DEFAULT_MAX_CARD_PRICE,
|
||||||
DEFAULT_THEME_TAGS, MONO_COLOR_MAP,
|
DEFAULT_MAX_DECK_PRICE, DEFAULT_THEME_TAGS, MONO_COLOR_MAP,
|
||||||
DUAL_COLOR_MAP, TRI_COLOR_MAP, OTHER_COLOR_MAP
|
DUAL_COLOR_MAP, TRI_COLOR_MAP, OTHER_COLOR_MAP
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -24,7 +26,7 @@ from exceptions import (
|
||||||
InvalidNumberError,
|
InvalidNumberError,
|
||||||
InvalidQuestionTypeError,
|
InvalidQuestionTypeError,
|
||||||
MaxAttemptsError,
|
MaxAttemptsError,
|
||||||
|
PriceError,
|
||||||
PriceLimitError,
|
PriceLimitError,
|
||||||
PriceValidationError
|
PriceValidationError
|
||||||
)
|
)
|
||||||
|
|
|
@ -13,7 +13,7 @@ from pathlib import Path
|
||||||
from typing import NoReturn, Optional
|
from typing import NoReturn, Optional
|
||||||
|
|
||||||
# Third-party imports
|
# Third-party imports
|
||||||
import inquirer
|
import inquirer.prompt
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
from deck_builder import DeckBuilder
|
from deck_builder import DeckBuilder
|
||||||
|
@ -104,7 +104,7 @@ def run_menu() -> NoReturn:
|
||||||
case 'Setup':
|
case 'Setup':
|
||||||
setup()
|
setup()
|
||||||
case 'Tag CSV Files':
|
case 'Tag CSV Files':
|
||||||
tagger.run_tagging(parallel=True)
|
tagger.run_tagging()
|
||||||
case 'Build a Deck':
|
case 'Build a Deck':
|
||||||
builder.determine_commander()
|
builder.determine_commander()
|
||||||
case 'Quit':
|
case 'Quit':
|
||||||
|
|
|
@ -1,16 +1,4 @@
|
||||||
from typing import Dict, List, Final, Iterable
|
from typing import Dict, List, Final
|
||||||
from dataclasses import dataclass
|
|
||||||
from settings import REQUIRED_CARD_COLUMNS as REQUIRED_COLUMNS # unified column list
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'TRIGGERS', 'NUM_TO_SEARCH', 'TAG_GROUPS', 'PATTERN_GROUPS', 'PHRASE_GROUPS',
|
|
||||||
'CREATE_ACTION_PATTERN', 'COUNTER_TYPES', 'CREATURE_TYPES', 'NON_CREATURE_TYPES',
|
|
||||||
'OUTLAW_TYPES', 'ENCHANTMENT_TOKENS', 'ARTIFACT_TOKENS', 'REQUIRED_COLUMNS',
|
|
||||||
'TYPE_TAG_MAPPING', 'DRAW_RELATED_TAGS', 'DRAW_EXCLUSION_PATTERNS',
|
|
||||||
'EQUIPMENT_EXCLUSIONS', 'EQUIPMENT_SPECIFIC_CARDS', 'EQUIPMENT_RELATED_TAGS',
|
|
||||||
'EQUIPMENT_TEXT_PATTERNS', 'AURA_SPECIFIC_CARDS', 'VOLTRON_COMMANDER_CARDS',
|
|
||||||
'VOLTRON_PATTERNS'
|
|
||||||
]
|
|
||||||
|
|
||||||
TRIGGERS: List[str] = ['when', 'whenever', 'at']
|
TRIGGERS: List[str] = ['when', 'whenever', 'at']
|
||||||
|
|
||||||
|
@ -68,75 +56,41 @@ PHRASE_GROUPS: Dict[str, List[str]] = {
|
||||||
CREATE_ACTION_PATTERN: Final[str] = r"create|put"
|
CREATE_ACTION_PATTERN: Final[str] = r"create|put"
|
||||||
|
|
||||||
# Creature/Counter types
|
# Creature/Counter types
|
||||||
"""Counter type vocabularies."""
|
COUNTER_TYPES: List[str] = [r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2',
|
||||||
|
'-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2',
|
||||||
# Power/Toughness modifier counters (regex fragments already escaped where needed)
|
'Acorn', 'Aegis', 'Age', 'Aim', 'Arrow', 'Arrowhead','Awakening',
|
||||||
PT_COUNTER_TYPES: List[str] = [
|
'Bait', 'Blaze', 'Blessing', 'Blight',' Blood', 'Bloddline',
|
||||||
r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2',
|
'Bloodstain', 'Book', 'Bounty', 'Brain', 'Bribery', 'Brick',
|
||||||
'-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2'
|
'Burden', 'Cage', 'Carrion', 'Charge', 'Coin', 'Collection',
|
||||||
]
|
'Component', 'Contested', 'Corruption', 'CRANK!', 'Credit',
|
||||||
|
'Croak', 'Corpse', 'Crystal', 'Cube', 'Currency', 'Death',
|
||||||
# Named counters (alphabetical within rough thematic blocks)
|
'Defense', 'Delay', 'Depletion', 'Descent', 'Despair', 'Devotion',
|
||||||
NAMED_COUNTER_TYPES: List[str] = [
|
'Divinity', 'Doom', 'Dream', 'Duty', 'Echo', 'Egg', 'Elixir',
|
||||||
'Acorn', 'Aegis', 'Age', 'Aim', 'Arrow', 'Arrowhead', 'Awakening',
|
'Ember', 'Energy', 'Enlightened', 'Eon', 'Eruption', 'Everything',
|
||||||
'Bait', 'Blaze', 'Blessing', 'Blight', 'Blood', 'Bloodline', 'Bloodstain', 'Book',
|
'Experience', 'Eyeball', 'Eyestalk', 'Fade', 'Fate', 'Feather',
|
||||||
'Bounty', 'Brain', 'Bribery', 'Brick', 'Burden', 'Cage', 'Carrion', 'Charge', 'Coin',
|
'Feeding', 'Fellowship', 'Fetch', 'Filibuster', 'Finality', 'Flame',
|
||||||
'Collection', 'Component', 'Contested', 'Corruption', 'CRANK!', 'Credit', 'Croak',
|
'Flood', 'Foreshadow', 'Fungus', 'Fury', 'Fuse', 'Gem', 'Ghostform',
|
||||||
'Corpse', 'Crystal', 'Cube', 'Currency', 'Death', 'Defense', 'Delay', 'Depletion',
|
'Glpyh', 'Gold', 'Growth', 'Hack', 'Harmony', 'Hatching', 'Hatchling',
|
||||||
'Descent', 'Despair', 'Devotion', 'Divinity', 'Doom', 'Dream', 'Duty', 'Echo', 'Egg',
|
'Healing', 'Hit', 'Hope',' Hone', 'Hoofprint', 'Hour', 'Hourglass',
|
||||||
'Elixir', 'Ember', 'Energy', 'Enlightened', 'Eon', 'Eruption', 'Everything',
|
'Hunger', 'Ice', 'Imposter', 'Incarnation', 'Incubation', 'Infection',
|
||||||
'Experience', 'Eyeball', 'Eyestalk', 'Fade', 'Fate', 'Feather', 'Feeding',
|
'Influence', 'Ingenuity', 'Intel', 'Intervention', 'Invitation',
|
||||||
'Fellowship', 'Fetch', 'Filibuster', 'Finality', 'Flame', 'Flood', 'Foreshadow',
|
'Isolation', 'Javelin', 'Judgment', 'Keyword', 'Ki', 'Kick',
|
||||||
'Fungus', 'Fury', 'Fuse', 'Gem', 'Ghostform', 'Glyph', 'Gold', 'Growth', 'Hack',
|
'Knickknack', 'Knowledge', 'Landmark', 'Level', 'Loot', 'Lore',
|
||||||
'Harmony', 'Hatching', 'Hatchling', 'Healing', 'Hit', 'Hope', 'Hone', 'Hoofprint',
|
'Loyalty', 'Luck', 'Magnet', 'Manabond', 'Manifestation', 'Mannequin',
|
||||||
'Hour', 'Hourglass', 'Hunger', 'Ice', 'Imposter', 'Incarnation', 'Incubation',
|
'Mask', 'Matrix', 'Memory', 'Midway', 'Mine', 'Mining', 'Mire',
|
||||||
'Infection', 'Influence', 'Ingenuity', 'Intel', 'Intervention', 'Invitation',
|
'Music', 'Muster', 'Necrodermis', 'Nest', 'Net', 'Night', 'Oil',
|
||||||
'Isolation', 'Javelin', 'Judgment', 'Keyword', 'Ki', 'Kick', 'Knickknack',
|
'Omen', 'Ore', 'Page', 'Pain', 'Palliation', 'Paralyzing', 'Pause',
|
||||||
'Knowledge', 'Landmark', 'Level', 'Loot', 'Lore', 'Loyalty', 'Luck', 'Magnet',
|
'Petal', 'Petrification', 'Phyresis', 'Phylatery', 'Pin', 'Plague',
|
||||||
'Manabond', 'Manifestation', 'Mannequin', 'Mask', 'Matrix', 'Memory', 'Midway',
|
'Plot', 'Point', 'Poison', 'Polyp', 'Possession', 'Pressure', 'Prey',
|
||||||
'Mine', 'Mining', 'Mire', 'Music', 'Muster', 'Necrodermis', 'Nest', 'Net', 'Night',
|
'Pupa', 'Quest', 'Rad', 'Rejection', 'Reprieve', 'Rev', 'Revival',
|
||||||
'Oil', 'Omen', 'Ore', 'Page', 'Pain', 'Palliation', 'Paralyzing', 'Pause', 'Petal',
|
'Ribbon', 'Ritual', 'Rope', 'Rust', 'Scream', 'Scroll', 'Shell',
|
||||||
'Petrification', 'Phyresis', 'Phylactery', 'Pin', 'Plague', 'Plot', 'Point', 'Poison',
|
'Shield', 'Silver', 'Shred', 'Sleep', 'Sleight', 'Slime', 'Slumber',
|
||||||
'Polyp', 'Possession', 'Pressure', 'Prey', 'Pupa', 'Quest', 'Rad', 'Rejection',
|
'Soot', 'Soul', 'Spark', 'Spite', 'Spore', 'Stash', 'Storage',
|
||||||
'Reprieve', 'Rev', 'Revival', 'Ribbon', 'Ritual', 'Rope', 'Rust', 'Scream', 'Scroll',
|
'Story', 'Strife', 'Study', 'Stun', 'Supply', 'Suspect', 'Takeover',
|
||||||
'Shell', 'Shield', 'Silver', 'Shred', 'Sleep', 'Sleight', 'Slime', 'Slumber', 'Soot',
|
'Task', 'Ticket', 'Tide', 'Time', 'Tower', 'Training', 'Trap',
|
||||||
'Soul', 'Spark', 'Spite', 'Spore', 'Stash', 'Storage', 'Story', 'Strife', 'Study',
|
'Treasure', 'Unity', 'Unlock', 'Valor', 'Velocity', 'Verse',
|
||||||
'Stun', 'Supply', 'Suspect', 'Takeover', 'Task', 'Ticket', 'Tide', 'Time', 'Tower',
|
'Vitality', 'Void', 'Volatile', 'Vortex', 'Vow', 'Voyage', 'Wage',
|
||||||
'Training', 'Trap', 'Treasure', 'Unity', 'Unlock', 'Valor', 'Velocity', 'Verse',
|
'Winch', 'Wind', 'Wish']
|
||||||
'Vitality', 'Void', 'Volatile', 'Vortex', 'Vow', 'Voyage', 'Wage', 'Winch', 'Wind',
|
|
||||||
'Wish'
|
|
||||||
]
|
|
||||||
|
|
||||||
# Dataclass describing a counter pattern and display label
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class CounterSpec:
|
|
||||||
pattern: str # Regex fragment (without trailing " counter")
|
|
||||||
label: str # Human-readable label (used in tag text)
|
|
||||||
group: str # 'pt' or 'named' (for future filtering)
|
|
||||||
|
|
||||||
def search_pattern(self) -> str:
|
|
||||||
"""Full regex used for searching (matches singular/plural)."""
|
|
||||||
return rf"{self.pattern} counter[s]?"
|
|
||||||
|
|
||||||
# Helper to derive label from pattern (unescape common sequences)
|
|
||||||
def _derive_label(p: str) -> str:
|
|
||||||
return p.replace('\\+','+')
|
|
||||||
|
|
||||||
def _build_counter_specs(pt_list: Iterable[str], named_list: Iterable[str]) -> List[CounterSpec]:
|
|
||||||
specs: List[CounterSpec] = []
|
|
||||||
specs.extend(CounterSpec(pattern=p, label=_derive_label(p), group='pt') for p in pt_list)
|
|
||||||
specs.extend(CounterSpec(pattern=p, label=p, group='named') for p in named_list)
|
|
||||||
return specs
|
|
||||||
|
|
||||||
ALL_COUNTER_SPECS: List[CounterSpec] = _build_counter_specs(PT_COUNTER_TYPES, NAMED_COUNTER_TYPES)
|
|
||||||
|
|
||||||
# Backward-compatible flat list (legacy usage)
|
|
||||||
COUNTER_TYPES: List[str] = [s.pattern for s in ALL_COUNTER_SPECS]
|
|
||||||
|
|
||||||
# Basic duplication guard (fails fast during import if misconfigured)
|
|
||||||
if len(COUNTER_TYPES) != len(set(COUNTER_TYPES)):
|
|
||||||
duplicate = sorted({p for p in COUNTER_TYPES if COUNTER_TYPES.count(p) > 1})
|
|
||||||
raise ValueError(f"Duplicate counter patterns detected: {duplicate}")
|
|
||||||
|
|
||||||
CREATURE_TYPES: List[str] = ['Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Antelope', 'Ape', 'Archer', 'Archon', 'Armadillo',
|
CREATURE_TYPES: List[str] = ['Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Antelope', 'Ape', 'Archer', 'Archon', 'Armadillo',
|
||||||
'Army', 'Artificer', 'Assassin', 'Assembly-Worker', 'Astartes', 'Atog', 'Aurochs', 'Automaton',
|
'Army', 'Artificer', 'Assassin', 'Assembly-Worker', 'Astartes', 'Atog', 'Aurochs', 'Automaton',
|
||||||
|
@ -191,7 +145,12 @@ ENCHANTMENT_TOKENS: List[str] = ['Cursed Role', 'Monster Role', 'Royal Role', 'S
|
||||||
ARTIFACT_TOKENS: List[str] = ['Blood', 'Clue', 'Food', 'Gold', 'Incubator',
|
ARTIFACT_TOKENS: List[str] = ['Blood', 'Clue', 'Food', 'Gold', 'Incubator',
|
||||||
'Junk','Map','Powerstone', 'Treasure']
|
'Junk','Map','Powerstone', 'Treasure']
|
||||||
|
|
||||||
# (REQUIRED_COLUMNS imported from settings to avoid duplication)
|
# Constants for DataFrame validation and processing
|
||||||
|
REQUIRED_COLUMNS: List[str] = [
|
||||||
|
'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors',
|
||||||
|
'manaCost', 'manaValue', 'type', 'creatureTypes', 'text',
|
||||||
|
'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side'
|
||||||
|
]
|
||||||
|
|
||||||
# Mapping of card types to their corresponding theme tags
|
# Mapping of card types to their corresponding theme tags
|
||||||
TYPE_TAG_MAPPING: Dict[str, List[str]] = {
|
TYPE_TAG_MAPPING: Dict[str, List[str]] = {
|
||||||
|
|
|
@ -16,10 +16,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
# Standard library imports
|
# Standard library imports
|
||||||
import re
|
import re
|
||||||
from typing import List, Set, Union, Any, Tuple
|
from typing import List, Set, Union, Any
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# Third-party imports
|
# Third-party imports
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -27,43 +24,6 @@ import pandas as pd
|
||||||
# Local application imports
|
# Local application imports
|
||||||
from . import tag_constants
|
from . import tag_constants
|
||||||
|
|
||||||
|
|
||||||
# --- Internal helpers for performance -----------------------------------------------------------
|
|
||||||
@lru_cache(maxsize=2048)
|
|
||||||
def _build_joined_pattern(parts: Tuple[str, ...]) -> str:
|
|
||||||
"""Join multiple regex parts with '|'. Cached for reuse across calls."""
|
|
||||||
return '|'.join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=2048)
|
|
||||||
def _compile_pattern(pattern: str, ignore_case: bool = True):
|
|
||||||
"""Compile a regex pattern with optional IGNORECASE. Cached for reuse."""
|
|
||||||
flags = re.IGNORECASE if ignore_case else 0
|
|
||||||
return re.compile(pattern, flags)
|
|
||||||
|
|
||||||
def _ensure_norm_series(df: pd.DataFrame, source_col: str, norm_col: str) -> pd.Series:
|
|
||||||
"""Ensure a cached normalized string series exists on df for source_col.
|
|
||||||
|
|
||||||
Normalization here means: fillna('') and cast to str once. This avoids
|
|
||||||
repeating fill/astype work on every mask creation. Extra columns are
|
|
||||||
later dropped by final reindex in output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: DataFrame containing the column
|
|
||||||
source_col: Name of the source column (e.g., 'text')
|
|
||||||
norm_col: Name of the cache column to create/use (e.g., '__text_s')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The normalized pandas Series.
|
|
||||||
"""
|
|
||||||
if norm_col in df.columns:
|
|
||||||
return df[norm_col]
|
|
||||||
# Create normalized string series
|
|
||||||
series = df[source_col].fillna('') if source_col in df.columns else pd.Series([''] * len(df), index=df.index)
|
|
||||||
series = series.astype(str)
|
|
||||||
df[norm_col] = series
|
|
||||||
return df[norm_col]
|
|
||||||
|
|
||||||
def pluralize(word: str) -> str:
|
def pluralize(word: str) -> str:
|
||||||
"""Convert a word to its plural form using basic English pluralization rules.
|
"""Convert a word to its plural form using basic English pluralization rules.
|
||||||
|
|
||||||
|
@ -118,21 +78,12 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
||||||
elif not isinstance(type_text, list):
|
elif not isinstance(type_text, list):
|
||||||
raise TypeError("type_text must be a string or list of strings")
|
raise TypeError("type_text must be a string or list of strings")
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
return pd.Series([], dtype=bool)
|
|
||||||
|
|
||||||
# Use normalized cached series
|
|
||||||
type_series = _ensure_norm_series(df, 'type', '__type_s')
|
|
||||||
|
|
||||||
if regex:
|
if regex:
|
||||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
pattern = '|'.join(f'{p}' for p in type_text)
|
||||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
return df['type'].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
return type_series.str.contains(compiled, na=False, regex=True)
|
|
||||||
else:
|
else:
|
||||||
masks = [type_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||||
if not masks:
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
return pd.Series(False, index=df.index)
|
|
||||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
|
||||||
|
|
||||||
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series[bool]:
|
def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series[bool]:
|
||||||
"""Create a boolean mask for rows where text matches one or more patterns.
|
"""Create a boolean mask for rows where text matches one or more patterns.
|
||||||
|
@ -158,22 +109,15 @@ def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
||||||
elif not isinstance(type_text, list):
|
elif not isinstance(type_text, list):
|
||||||
raise TypeError("type_text must be a string or list of strings")
|
raise TypeError("type_text must be a string or list of strings")
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
return pd.Series([], dtype=bool)
|
|
||||||
|
|
||||||
# Use normalized cached series
|
|
||||||
text_series = _ensure_norm_series(df, 'text', '__text_s')
|
|
||||||
|
|
||||||
if regex:
|
if regex:
|
||||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
pattern = '|'.join(f'{p}' for p in type_text)
|
||||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
return df['text'].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
return text_series.str.contains(compiled, na=False, regex=True)
|
|
||||||
else:
|
else:
|
||||||
masks = [text_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||||
if not masks:
|
if combine_with_or:
|
||||||
return pd.Series(False, index=df.index)
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
reduced = np.logical_or.reduce(masks) if combine_with_or else np.logical_and.reduce(masks)
|
else:
|
||||||
return pd.Series(reduced, index=df.index)
|
return pd.concat(masks, axis=1).all(axis=1)
|
||||||
|
|
||||||
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
||||||
"""Create a boolean mask for rows where keyword text matches one or more patterns.
|
"""Create a boolean mask for rows where keyword text matches one or more patterns.
|
||||||
|
@ -207,18 +151,18 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege
|
||||||
elif not isinstance(type_text, list):
|
elif not isinstance(type_text, list):
|
||||||
raise TypeError("type_text must be a string or list of strings")
|
raise TypeError("type_text must be a string or list of strings")
|
||||||
|
|
||||||
# Use normalized cached series for keywords
|
# Create default mask for null values
|
||||||
keywords = _ensure_norm_series(df, 'keywords', '__keywords_s')
|
# Handle null values and convert to string
|
||||||
|
keywords = df['keywords'].fillna('')
|
||||||
|
# Convert non-string values to strings
|
||||||
|
keywords = keywords.astype(str)
|
||||||
|
|
||||||
if regex:
|
if regex:
|
||||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
pattern = '|'.join(f'{p}' for p in type_text)
|
||||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
return keywords.str.contains(pattern, case=False, na=False, regex=True)
|
||||||
return keywords.str.contains(compiled, na=False, regex=True)
|
|
||||||
else:
|
else:
|
||||||
masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||||
if not masks:
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
return pd.Series(False, index=df.index)
|
|
||||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
|
||||||
|
|
||||||
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]:
|
||||||
"""Create a boolean mask for rows where name matches one or more patterns.
|
"""Create a boolean mask for rows where name matches one or more patterns.
|
||||||
|
@ -243,21 +187,12 @@ def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex:
|
||||||
elif not isinstance(type_text, list):
|
elif not isinstance(type_text, list):
|
||||||
raise TypeError("type_text must be a string or list of strings")
|
raise TypeError("type_text must be a string or list of strings")
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
return pd.Series([], dtype=bool)
|
|
||||||
|
|
||||||
# Use normalized cached series
|
|
||||||
name_series = _ensure_norm_series(df, 'name', '__name_s')
|
|
||||||
|
|
||||||
if regex:
|
if regex:
|
||||||
pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0]
|
pattern = '|'.join(f'{p}' for p in type_text)
|
||||||
compiled = _compile_pattern(pattern, ignore_case=True)
|
return df['name'].str.contains(pattern, case=False, na=False, regex=True)
|
||||||
return name_series.str.contains(compiled, na=False, regex=True)
|
|
||||||
else:
|
else:
|
||||||
masks = [name_series.str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
masks = [df['name'].str.contains(p, case=False, na=False, regex=False) for p in type_text]
|
||||||
if not masks:
|
return pd.concat(masks, axis=1).any(axis=1)
|
||||||
return pd.Series(False, index=df.index)
|
|
||||||
return pd.Series(np.logical_or.reduce(masks), index=df.index)
|
|
||||||
|
|
||||||
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
|
def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]:
|
||||||
"""Extract creature types from a type text string.
|
"""Extract creature types from a type text string.
|
||||||
|
@ -372,31 +307,6 @@ def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series[bool], tags: Union[st
|
||||||
# Add new tags
|
# Add new tags
|
||||||
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
|
df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags))))
|
||||||
|
|
||||||
def apply_rules(df: pd.DataFrame, rules: List[dict]) -> None:
|
|
||||||
"""Apply a list of rules to a DataFrame.
|
|
||||||
|
|
||||||
Each rule dict supports:
|
|
||||||
- mask: pd.Series of booleans or a callable df->mask
|
|
||||||
- tags: str|List[str]
|
|
||||||
|
|
||||||
Example:
|
|
||||||
rules = [
|
|
||||||
{ 'mask': lambda d: create_text_mask(d, 'lifelink'), 'tags': ['Lifelink'] },
|
|
||||||
]
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: DataFrame to update
|
|
||||||
rules: list of rule dicts
|
|
||||||
"""
|
|
||||||
for rule in rules:
|
|
||||||
mask = rule.get('mask')
|
|
||||||
if callable(mask):
|
|
||||||
mask = mask(df)
|
|
||||||
if mask is None:
|
|
||||||
continue
|
|
||||||
tags = rule.get('tags', [])
|
|
||||||
apply_tag_vectorized(df, mask, tags)
|
|
||||||
|
|
||||||
def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[bool]:
|
def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[bool]:
|
||||||
"""Create a boolean mask for cards with mass removal effects of a specific type.
|
"""Create a boolean mask for cards with mass removal effects of a specific type.
|
||||||
|
|
||||||
|
@ -416,60 +326,6 @@ def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[boo
|
||||||
patterns = tag_constants.BOARD_WIPE_TEXT_PATTERNS[effect_type]
|
patterns = tag_constants.BOARD_WIPE_TEXT_PATTERNS[effect_type]
|
||||||
return create_text_mask(df, patterns)
|
return create_text_mask(df, patterns)
|
||||||
|
|
||||||
def create_trigger_mask(
|
|
||||||
df: pd.DataFrame,
|
|
||||||
subjects: Union[str, List[str]],
|
|
||||||
include_attacks: bool = False,
|
|
||||||
) -> pd.Series:
|
|
||||||
"""Create a mask for text that contains trigger phrases followed by subjects.
|
|
||||||
|
|
||||||
Example: with subjects=['a creature','you'] builds patterns:
|
|
||||||
'when a creature', 'whenever you', 'at you', etc.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: DataFrame
|
|
||||||
subjects: A subject string or list (will be normalized to list)
|
|
||||||
include_attacks: If True, also include '{trigger} .* attacks'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Boolean Series mask
|
|
||||||
"""
|
|
||||||
subs = [subjects] if isinstance(subjects, str) else subjects
|
|
||||||
patterns: List[str] = []
|
|
||||||
for trig in tag_constants.TRIGGERS:
|
|
||||||
patterns.extend([f"{trig} {s}" for s in subs])
|
|
||||||
if include_attacks:
|
|
||||||
patterns.append(f"{trig} .* attacks")
|
|
||||||
return create_text_mask(df, patterns)
|
|
||||||
|
|
||||||
def create_numbered_phrase_mask(
|
|
||||||
df: pd.DataFrame,
|
|
||||||
verb: Union[str, List[str]],
|
|
||||||
noun: str = '',
|
|
||||||
numbers: List[str] | None = None,
|
|
||||||
) -> pd.Series:
|
|
||||||
"""Create a boolean mask for phrases like 'draw {num} card'.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
df: DataFrame to search
|
|
||||||
verb: Action verb or list of verbs (e.g., 'draw' or ['gain', 'gains'])
|
|
||||||
noun: Optional object noun in singular form (e.g., 'card'); if empty, omitted
|
|
||||||
numbers: Optional list of number words/digits (defaults to tag_constants.NUM_TO_SEARCH)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Boolean Series mask
|
|
||||||
"""
|
|
||||||
if numbers is None:
|
|
||||||
numbers = tag_constants.NUM_TO_SEARCH
|
|
||||||
# Normalize verbs to list
|
|
||||||
verbs = [verb] if isinstance(verb, str) else verb
|
|
||||||
# Build patterns
|
|
||||||
if noun:
|
|
||||||
patterns = [fr"{v}\s+{num}\s+{noun}" for v in verbs for num in numbers]
|
|
||||||
else:
|
|
||||||
patterns = [fr"{v}\s+{num}" for v in verbs for num in numbers]
|
|
||||||
return create_text_mask(df, patterns)
|
|
||||||
|
|
||||||
def create_damage_pattern(number: Union[int, str]) -> str:
|
def create_damage_pattern(number: Union[int, str]) -> str:
|
||||||
"""Create a pattern for matching X damage effects.
|
"""Create a pattern for matching X damage effects.
|
||||||
|
|
||||||
|
|
|
@ -3044,15 +3044,18 @@ def tag_for_special_counters(df: pd.DataFrame, color: str) -> None:
|
||||||
start_time = pd.Timestamp.now()
|
start_time = pd.Timestamp.now()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Process each counter type (supports singular/plural)
|
# Process each counter type
|
||||||
counter_counts = {}
|
counter_counts = {}
|
||||||
for spec in tag_constants.ALL_COUNTER_SPECS:
|
for counter_type in tag_constants.COUNTER_TYPES:
|
||||||
pattern = spec.search_pattern()
|
# Create pattern for this counter type
|
||||||
|
pattern = f'{counter_type} counter'
|
||||||
mask = tag_utils.create_text_mask(df, pattern)
|
mask = tag_utils.create_text_mask(df, pattern)
|
||||||
|
|
||||||
if mask.any():
|
if mask.any():
|
||||||
tags = [f'{spec.label} Counters', 'Counters Matter']
|
# Apply tags via rules engine
|
||||||
tag_utils.apply_rules(df, [ {'mask': mask, 'tags': tags} ])
|
tags = [f'{counter_type} Counters', 'Counters Matter']
|
||||||
counter_counts[spec.label] = int(mask.sum())
|
tag_utils.apply_rules(df, [ { 'mask': mask, 'tags': tags } ])
|
||||||
|
counter_counts[counter_type] = mask.sum()
|
||||||
|
|
||||||
# Log results
|
# Log results
|
||||||
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
||||||
|
@ -6491,4 +6494,3 @@ def run_tagging(parallel: bool = False, max_workers: int | None = None):
|
||||||
|
|
||||||
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
duration = (pd.Timestamp.now() - start_time).total_seconds()
|
||||||
logger.info(f'Tagged cards in {duration:.2f}s')
|
logger.info(f'Tagged cards in {duration:.2f}s')
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,6 @@ inquirer>=3.1.3
|
||||||
typing_extensions>=4.5.0
|
typing_extensions>=4.5.0
|
||||||
fuzzywuzzy>=0.18.0
|
fuzzywuzzy>=0.18.0
|
||||||
python-Levenshtein>=0.12.0
|
python-Levenshtein>=0.12.0
|
||||||
tqdm>=4.66.0
|
|
||||||
scrython>=1.10.0
|
|
||||||
numpy>=1.24.0
|
|
||||||
requests>=2.31.0
|
|
||||||
|
|
||||||
# Development dependencies
|
# Development dependencies
|
||||||
mypy>=1.3.0
|
mypy>=1.3.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue