From dd04dd050538c6a86acf4363f60d1a4035901ba4 Mon Sep 17 00:00:00 2001 From: mwisnowski Date: Fri, 15 Aug 2025 10:04:19 -0700 Subject: [PATCH] Revert "Massively overhauled the tagging process, refactored code to improve general effeciciency, implemented parallel processing to reduce total runtime" This reverts commit 27ee13fb54740c6ce78dac5f30705faf9e6e5611. --- code/deck_builder/builder.py | 20 ++-- code/input_handler.py | 12 ++- code/main.py | 4 +- code/tagging/tag_constants.py | 125 ++++++++-------------- code/tagging/tag_utils.py | 192 +++++----------------------------- code/tagging/tagger.py | 16 +-- requirements.txt | 4 - 7 files changed, 96 insertions(+), 277 deletions(-) diff --git a/code/deck_builder/builder.py b/code/deck_builder/builder.py index d9a5fb5..e0eb0d8 100644 --- a/code/deck_builder/builder.py +++ b/code/deck_builder/builder.py @@ -1,29 +1,33 @@ from __future__ import annotations import math -import pprint +import numpy as np +import os +import random import time -# from functools import lru_cache +from functools import lru_cache from typing import Dict, List, Optional, Union -# import keyboard +import inquirer.prompt +import keyboard import pandas as pd -# from fuzzywuzzy import process +import pprint +from fuzzywuzzy import process from tqdm import tqdm from settings import CSV_DIRECTORY, MULTIPLE_COPY_CARDS from .builder_constants import ( BASIC_LANDS, CARD_TYPES, DEFAULT_NON_BASIC_LAND_SLOTS, - FETCH_LAND_DEFAULT_COUNT, + COMMANDER_CSV_PATH, FUZZY_MATCH_THRESHOLD, MAX_FUZZY_CHOICES, FETCH_LAND_DEFAULT_COUNT, COMMANDER_POWER_DEFAULT, COMMANDER_TOUGHNESS_DEFAULT, COMMANDER_MANA_COST_DEFAULT, COMMANDER_MANA_VALUE_DEFAULT, COMMANDER_TYPE_DEFAULT, COMMANDER_TEXT_DEFAULT, THEME_PRIORITY_BONUS, THEME_POOL_SIZE_MULTIPLIER, DECK_DIRECTORY, COMMANDER_COLOR_IDENTITY_DEFAULT, COMMANDER_COLORS_DEFAULT, COMMANDER_TAGS_DEFAULT, COMMANDER_THEMES_DEFAULT, COMMANDER_CREATURE_TYPES_DEFAULT, DUAL_LAND_TYPE_MAP, - CSV_READ_TIMEOUT, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS, + CSV_READ_TIMEOUT, CSV_PROCESSING_BATCH_SIZE, CSV_VALIDATION_RULES, CSV_REQUIRED_COLUMNS, STAPLE_LAND_CONDITIONS, TRIPLE_LAND_TYPE_MAP, MISC_LAND_MAX_COUNT, MISC_LAND_MIN_COUNT, MISC_LAND_POOL_SIZE, LAND_REMOVAL_MAX_ATTEMPTS, PROTECTED_LANDS, - MANA_COLORS, THEME_WEIGHT_MULTIPLIER + MANA_COLORS, MANA_PIP_PATTERNS, THEME_WEIGHT_MULTIPLIER ) from . import builder_utils from file_setup import setup_utils @@ -71,7 +75,7 @@ from type_definitions import ( ArtifactDF, CreatureDF, NonCreatureDF, - + PlaneswalkerDF, NonPlaneswalkerDF) import logging_util diff --git a/code/input_handler.py b/code/input_handler.py index 97860de..811dfd8 100644 --- a/code/input_handler.py +++ b/code/input_handler.py @@ -2,14 +2,16 @@ from __future__ import annotations -from typing import Any, List, Tuple, Union +import logging +import os +from typing import Any, List, Optional, Tuple, Union -import inquirer +import inquirer.prompt from settings import ( - COLOR_ABRV + COLORS, COLOR_ABRV ) from deck_builder.builder_constants import (DEFAULT_MAX_CARD_PRICE, - DEFAULT_THEME_TAGS, MONO_COLOR_MAP, + DEFAULT_MAX_DECK_PRICE, DEFAULT_THEME_TAGS, MONO_COLOR_MAP, DUAL_COLOR_MAP, TRI_COLOR_MAP, OTHER_COLOR_MAP ) @@ -24,7 +26,7 @@ from exceptions import ( InvalidNumberError, InvalidQuestionTypeError, MaxAttemptsError, - + PriceError, PriceLimitError, PriceValidationError ) diff --git a/code/main.py b/code/main.py index b334a2f..e9f8ef5 100644 --- a/code/main.py +++ b/code/main.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import NoReturn, Optional # Third-party imports -import inquirer +import inquirer.prompt # Local imports from deck_builder import DeckBuilder @@ -104,7 +104,7 @@ def run_menu() -> NoReturn: case 'Setup': setup() case 'Tag CSV Files': - tagger.run_tagging(parallel=True) + tagger.run_tagging() case 'Build a Deck': builder.determine_commander() case 'Quit': diff --git a/code/tagging/tag_constants.py b/code/tagging/tag_constants.py index 5cec06b..232e040 100644 --- a/code/tagging/tag_constants.py +++ b/code/tagging/tag_constants.py @@ -1,16 +1,4 @@ -from typing import Dict, List, Final, Iterable -from dataclasses import dataclass -from settings import REQUIRED_CARD_COLUMNS as REQUIRED_COLUMNS # unified column list - -__all__ = [ - 'TRIGGERS', 'NUM_TO_SEARCH', 'TAG_GROUPS', 'PATTERN_GROUPS', 'PHRASE_GROUPS', - 'CREATE_ACTION_PATTERN', 'COUNTER_TYPES', 'CREATURE_TYPES', 'NON_CREATURE_TYPES', - 'OUTLAW_TYPES', 'ENCHANTMENT_TOKENS', 'ARTIFACT_TOKENS', 'REQUIRED_COLUMNS', - 'TYPE_TAG_MAPPING', 'DRAW_RELATED_TAGS', 'DRAW_EXCLUSION_PATTERNS', - 'EQUIPMENT_EXCLUSIONS', 'EQUIPMENT_SPECIFIC_CARDS', 'EQUIPMENT_RELATED_TAGS', - 'EQUIPMENT_TEXT_PATTERNS', 'AURA_SPECIFIC_CARDS', 'VOLTRON_COMMANDER_CARDS', - 'VOLTRON_PATTERNS' -] +from typing import Dict, List, Final TRIGGERS: List[str] = ['when', 'whenever', 'at'] @@ -68,75 +56,41 @@ PHRASE_GROUPS: Dict[str, List[str]] = { CREATE_ACTION_PATTERN: Final[str] = r"create|put" # Creature/Counter types -"""Counter type vocabularies.""" - -# Power/Toughness modifier counters (regex fragments already escaped where needed) -PT_COUNTER_TYPES: List[str] = [ - r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2', - '-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2' -] - -# Named counters (alphabetical within rough thematic blocks) -NAMED_COUNTER_TYPES: List[str] = [ - 'Acorn', 'Aegis', 'Age', 'Aim', 'Arrow', 'Arrowhead', 'Awakening', - 'Bait', 'Blaze', 'Blessing', 'Blight', 'Blood', 'Bloodline', 'Bloodstain', 'Book', - 'Bounty', 'Brain', 'Bribery', 'Brick', 'Burden', 'Cage', 'Carrion', 'Charge', 'Coin', - 'Collection', 'Component', 'Contested', 'Corruption', 'CRANK!', 'Credit', 'Croak', - 'Corpse', 'Crystal', 'Cube', 'Currency', 'Death', 'Defense', 'Delay', 'Depletion', - 'Descent', 'Despair', 'Devotion', 'Divinity', 'Doom', 'Dream', 'Duty', 'Echo', 'Egg', - 'Elixir', 'Ember', 'Energy', 'Enlightened', 'Eon', 'Eruption', 'Everything', - 'Experience', 'Eyeball', 'Eyestalk', 'Fade', 'Fate', 'Feather', 'Feeding', - 'Fellowship', 'Fetch', 'Filibuster', 'Finality', 'Flame', 'Flood', 'Foreshadow', - 'Fungus', 'Fury', 'Fuse', 'Gem', 'Ghostform', 'Glyph', 'Gold', 'Growth', 'Hack', - 'Harmony', 'Hatching', 'Hatchling', 'Healing', 'Hit', 'Hope', 'Hone', 'Hoofprint', - 'Hour', 'Hourglass', 'Hunger', 'Ice', 'Imposter', 'Incarnation', 'Incubation', - 'Infection', 'Influence', 'Ingenuity', 'Intel', 'Intervention', 'Invitation', - 'Isolation', 'Javelin', 'Judgment', 'Keyword', 'Ki', 'Kick', 'Knickknack', - 'Knowledge', 'Landmark', 'Level', 'Loot', 'Lore', 'Loyalty', 'Luck', 'Magnet', - 'Manabond', 'Manifestation', 'Mannequin', 'Mask', 'Matrix', 'Memory', 'Midway', - 'Mine', 'Mining', 'Mire', 'Music', 'Muster', 'Necrodermis', 'Nest', 'Net', 'Night', - 'Oil', 'Omen', 'Ore', 'Page', 'Pain', 'Palliation', 'Paralyzing', 'Pause', 'Petal', - 'Petrification', 'Phyresis', 'Phylactery', 'Pin', 'Plague', 'Plot', 'Point', 'Poison', - 'Polyp', 'Possession', 'Pressure', 'Prey', 'Pupa', 'Quest', 'Rad', 'Rejection', - 'Reprieve', 'Rev', 'Revival', 'Ribbon', 'Ritual', 'Rope', 'Rust', 'Scream', 'Scroll', - 'Shell', 'Shield', 'Silver', 'Shred', 'Sleep', 'Sleight', 'Slime', 'Slumber', 'Soot', - 'Soul', 'Spark', 'Spite', 'Spore', 'Stash', 'Storage', 'Story', 'Strife', 'Study', - 'Stun', 'Supply', 'Suspect', 'Takeover', 'Task', 'Ticket', 'Tide', 'Time', 'Tower', - 'Training', 'Trap', 'Treasure', 'Unity', 'Unlock', 'Valor', 'Velocity', 'Verse', - 'Vitality', 'Void', 'Volatile', 'Vortex', 'Vow', 'Voyage', 'Wage', 'Winch', 'Wind', - 'Wish' -] - -# Dataclass describing a counter pattern and display label -@dataclass(frozen=True) -class CounterSpec: - pattern: str # Regex fragment (without trailing " counter") - label: str # Human-readable label (used in tag text) - group: str # 'pt' or 'named' (for future filtering) - - def search_pattern(self) -> str: - """Full regex used for searching (matches singular/plural).""" - return rf"{self.pattern} counter[s]?" - -# Helper to derive label from pattern (unescape common sequences) -def _derive_label(p: str) -> str: - return p.replace('\\+','+') - -def _build_counter_specs(pt_list: Iterable[str], named_list: Iterable[str]) -> List[CounterSpec]: - specs: List[CounterSpec] = [] - specs.extend(CounterSpec(pattern=p, label=_derive_label(p), group='pt') for p in pt_list) - specs.extend(CounterSpec(pattern=p, label=p, group='named') for p in named_list) - return specs - -ALL_COUNTER_SPECS: List[CounterSpec] = _build_counter_specs(PT_COUNTER_TYPES, NAMED_COUNTER_TYPES) - -# Backward-compatible flat list (legacy usage) -COUNTER_TYPES: List[str] = [s.pattern for s in ALL_COUNTER_SPECS] - -# Basic duplication guard (fails fast during import if misconfigured) -if len(COUNTER_TYPES) != len(set(COUNTER_TYPES)): - duplicate = sorted({p for p in COUNTER_TYPES if COUNTER_TYPES.count(p) > 1}) - raise ValueError(f"Duplicate counter patterns detected: {duplicate}") +COUNTER_TYPES: List[str] = [r'\+0/\+1', r'\+0/\+2', r'\+1/\+0', r'\+1/\+2', r'\+2/\+0', r'\+2/\+2', + '-0/-1', '-0/-2', '-1/-0', '-1/-2', '-2/-0', '-2/-2', + 'Acorn', 'Aegis', 'Age', 'Aim', 'Arrow', 'Arrowhead','Awakening', + 'Bait', 'Blaze', 'Blessing', 'Blight',' Blood', 'Bloddline', + 'Bloodstain', 'Book', 'Bounty', 'Brain', 'Bribery', 'Brick', + 'Burden', 'Cage', 'Carrion', 'Charge', 'Coin', 'Collection', + 'Component', 'Contested', 'Corruption', 'CRANK!', 'Credit', + 'Croak', 'Corpse', 'Crystal', 'Cube', 'Currency', 'Death', + 'Defense', 'Delay', 'Depletion', 'Descent', 'Despair', 'Devotion', + 'Divinity', 'Doom', 'Dream', 'Duty', 'Echo', 'Egg', 'Elixir', + 'Ember', 'Energy', 'Enlightened', 'Eon', 'Eruption', 'Everything', + 'Experience', 'Eyeball', 'Eyestalk', 'Fade', 'Fate', 'Feather', + 'Feeding', 'Fellowship', 'Fetch', 'Filibuster', 'Finality', 'Flame', + 'Flood', 'Foreshadow', 'Fungus', 'Fury', 'Fuse', 'Gem', 'Ghostform', + 'Glpyh', 'Gold', 'Growth', 'Hack', 'Harmony', 'Hatching', 'Hatchling', + 'Healing', 'Hit', 'Hope',' Hone', 'Hoofprint', 'Hour', 'Hourglass', + 'Hunger', 'Ice', 'Imposter', 'Incarnation', 'Incubation', 'Infection', + 'Influence', 'Ingenuity', 'Intel', 'Intervention', 'Invitation', + 'Isolation', 'Javelin', 'Judgment', 'Keyword', 'Ki', 'Kick', + 'Knickknack', 'Knowledge', 'Landmark', 'Level', 'Loot', 'Lore', + 'Loyalty', 'Luck', 'Magnet', 'Manabond', 'Manifestation', 'Mannequin', + 'Mask', 'Matrix', 'Memory', 'Midway', 'Mine', 'Mining', 'Mire', + 'Music', 'Muster', 'Necrodermis', 'Nest', 'Net', 'Night', 'Oil', + 'Omen', 'Ore', 'Page', 'Pain', 'Palliation', 'Paralyzing', 'Pause', + 'Petal', 'Petrification', 'Phyresis', 'Phylatery', 'Pin', 'Plague', + 'Plot', 'Point', 'Poison', 'Polyp', 'Possession', 'Pressure', 'Prey', + 'Pupa', 'Quest', 'Rad', 'Rejection', 'Reprieve', 'Rev', 'Revival', + 'Ribbon', 'Ritual', 'Rope', 'Rust', 'Scream', 'Scroll', 'Shell', + 'Shield', 'Silver', 'Shred', 'Sleep', 'Sleight', 'Slime', 'Slumber', + 'Soot', 'Soul', 'Spark', 'Spite', 'Spore', 'Stash', 'Storage', + 'Story', 'Strife', 'Study', 'Stun', 'Supply', 'Suspect', 'Takeover', + 'Task', 'Ticket', 'Tide', 'Time', 'Tower', 'Training', 'Trap', + 'Treasure', 'Unity', 'Unlock', 'Valor', 'Velocity', 'Verse', + 'Vitality', 'Void', 'Volatile', 'Vortex', 'Vow', 'Voyage', 'Wage', + 'Winch', 'Wind', 'Wish'] CREATURE_TYPES: List[str] = ['Advisor', 'Aetherborn', 'Alien', 'Ally', 'Angel', 'Antelope', 'Ape', 'Archer', 'Archon', 'Armadillo', 'Army', 'Artificer', 'Assassin', 'Assembly-Worker', 'Astartes', 'Atog', 'Aurochs', 'Automaton', @@ -191,7 +145,12 @@ ENCHANTMENT_TOKENS: List[str] = ['Cursed Role', 'Monster Role', 'Royal Role', 'S ARTIFACT_TOKENS: List[str] = ['Blood', 'Clue', 'Food', 'Gold', 'Incubator', 'Junk','Map','Powerstone', 'Treasure'] -# (REQUIRED_COLUMNS imported from settings to avoid duplication) +# Constants for DataFrame validation and processing +REQUIRED_COLUMNS: List[str] = [ + 'name', 'faceName', 'edhrecRank', 'colorIdentity', 'colors', + 'manaCost', 'manaValue', 'type', 'creatureTypes', 'text', + 'power', 'toughness', 'keywords', 'themeTags', 'layout', 'side' +] # Mapping of card types to their corresponding theme tags TYPE_TAG_MAPPING: Dict[str, List[str]] = { diff --git a/code/tagging/tag_utils.py b/code/tagging/tag_utils.py index 156f0f5..8374b96 100644 --- a/code/tagging/tag_utils.py +++ b/code/tagging/tag_utils.py @@ -16,10 +16,7 @@ from __future__ import annotations # Standard library imports import re -from typing import List, Set, Union, Any, Tuple -from functools import lru_cache - -import numpy as np +from typing import List, Set, Union, Any # Third-party imports import pandas as pd @@ -27,43 +24,6 @@ import pandas as pd # Local application imports from . import tag_constants - -# --- Internal helpers for performance ----------------------------------------------------------- -@lru_cache(maxsize=2048) -def _build_joined_pattern(parts: Tuple[str, ...]) -> str: - """Join multiple regex parts with '|'. Cached for reuse across calls.""" - return '|'.join(parts) - - -@lru_cache(maxsize=2048) -def _compile_pattern(pattern: str, ignore_case: bool = True): - """Compile a regex pattern with optional IGNORECASE. Cached for reuse.""" - flags = re.IGNORECASE if ignore_case else 0 - return re.compile(pattern, flags) - -def _ensure_norm_series(df: pd.DataFrame, source_col: str, norm_col: str) -> pd.Series: - """Ensure a cached normalized string series exists on df for source_col. - - Normalization here means: fillna('') and cast to str once. This avoids - repeating fill/astype work on every mask creation. Extra columns are - later dropped by final reindex in output. - - Args: - df: DataFrame containing the column - source_col: Name of the source column (e.g., 'text') - norm_col: Name of the cache column to create/use (e.g., '__text_s') - - Returns: - The normalized pandas Series. - """ - if norm_col in df.columns: - return df[norm_col] - # Create normalized string series - series = df[source_col].fillna('') if source_col in df.columns else pd.Series([''] * len(df), index=df.index) - series = series.astype(str) - df[norm_col] = series - return df[norm_col] - def pluralize(word: str) -> str: """Convert a word to its plural form using basic English pluralization rules. @@ -118,21 +78,12 @@ def create_type_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: elif not isinstance(type_text, list): raise TypeError("type_text must be a string or list of strings") - if len(df) == 0: - return pd.Series([], dtype=bool) - - # Use normalized cached series - type_series = _ensure_norm_series(df, 'type', '__type_s') - if regex: - pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0] - compiled = _compile_pattern(pattern, ignore_case=True) - return type_series.str.contains(compiled, na=False, regex=True) + pattern = '|'.join(f'{p}' for p in type_text) + return df['type'].str.contains(pattern, case=False, na=False, regex=True) else: - masks = [type_series.str.contains(p, case=False, na=False, regex=False) for p in type_text] - if not masks: - return pd.Series(False, index=df.index) - return pd.Series(np.logical_or.reduce(masks), index=df.index) + masks = [df['type'].str.contains(p, case=False, na=False, regex=False) for p in type_text] + return pd.concat(masks, axis=1).any(axis=1) def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True, combine_with_or: bool = True) -> pd.Series[bool]: """Create a boolean mask for rows where text matches one or more patterns. @@ -158,22 +109,15 @@ def create_text_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: elif not isinstance(type_text, list): raise TypeError("type_text must be a string or list of strings") - if len(df) == 0: - return pd.Series([], dtype=bool) - - # Use normalized cached series - text_series = _ensure_norm_series(df, 'text', '__text_s') - if regex: - pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0] - compiled = _compile_pattern(pattern, ignore_case=True) - return text_series.str.contains(compiled, na=False, regex=True) + pattern = '|'.join(f'{p}' for p in type_text) + return df['text'].str.contains(pattern, case=False, na=False, regex=True) else: - masks = [text_series.str.contains(p, case=False, na=False, regex=False) for p in type_text] - if not masks: - return pd.Series(False, index=df.index) - reduced = np.logical_or.reduce(masks) if combine_with_or else np.logical_and.reduce(masks) - return pd.Series(reduced, index=df.index) + masks = [df['text'].str.contains(p, case=False, na=False, regex=False) for p in type_text] + if combine_with_or: + return pd.concat(masks, axis=1).any(axis=1) + else: + return pd.concat(masks, axis=1).all(axis=1) def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]: """Create a boolean mask for rows where keyword text matches one or more patterns. @@ -207,18 +151,18 @@ def create_keyword_mask(df: pd.DataFrame, type_text: Union[str, List[str]], rege elif not isinstance(type_text, list): raise TypeError("type_text must be a string or list of strings") - # Use normalized cached series for keywords - keywords = _ensure_norm_series(df, 'keywords', '__keywords_s') + # Create default mask for null values + # Handle null values and convert to string + keywords = df['keywords'].fillna('') + # Convert non-string values to strings + keywords = keywords.astype(str) if regex: - pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0] - compiled = _compile_pattern(pattern, ignore_case=True) - return keywords.str.contains(compiled, na=False, regex=True) + pattern = '|'.join(f'{p}' for p in type_text) + return keywords.str.contains(pattern, case=False, na=False, regex=True) else: masks = [keywords.str.contains(p, case=False, na=False, regex=False) for p in type_text] - if not masks: - return pd.Series(False, index=df.index) - return pd.Series(np.logical_or.reduce(masks), index=df.index) + return pd.concat(masks, axis=1).any(axis=1) def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: bool = True) -> pd.Series[bool]: """Create a boolean mask for rows where name matches one or more patterns. @@ -243,21 +187,12 @@ def create_name_mask(df: pd.DataFrame, type_text: Union[str, List[str]], regex: elif not isinstance(type_text, list): raise TypeError("type_text must be a string or list of strings") - if len(df) == 0: - return pd.Series([], dtype=bool) - - # Use normalized cached series - name_series = _ensure_norm_series(df, 'name', '__name_s') - if regex: - pattern = _build_joined_pattern(tuple(type_text)) if len(type_text) > 1 else type_text[0] - compiled = _compile_pattern(pattern, ignore_case=True) - return name_series.str.contains(compiled, na=False, regex=True) + pattern = '|'.join(f'{p}' for p in type_text) + return df['name'].str.contains(pattern, case=False, na=False, regex=True) else: - masks = [name_series.str.contains(p, case=False, na=False, regex=False) for p in type_text] - if not masks: - return pd.Series(False, index=df.index) - return pd.Series(np.logical_or.reduce(masks), index=df.index) + masks = [df['name'].str.contains(p, case=False, na=False, regex=False) for p in type_text] + return pd.concat(masks, axis=1).any(axis=1) def extract_creature_types(type_text: str, creature_types: List[str], non_creature_types: List[str]) -> List[str]: """Extract creature types from a type text string. @@ -372,31 +307,6 @@ def apply_tag_vectorized(df: pd.DataFrame, mask: pd.Series[bool], tags: Union[st # Add new tags df.loc[mask, 'themeTags'] = current_tags.apply(lambda x: sorted(list(set(x + tags)))) -def apply_rules(df: pd.DataFrame, rules: List[dict]) -> None: - """Apply a list of rules to a DataFrame. - - Each rule dict supports: - - mask: pd.Series of booleans or a callable df->mask - - tags: str|List[str] - - Example: - rules = [ - { 'mask': lambda d: create_text_mask(d, 'lifelink'), 'tags': ['Lifelink'] }, - ] - - Args: - df: DataFrame to update - rules: list of rule dicts - """ - for rule in rules: - mask = rule.get('mask') - if callable(mask): - mask = mask(df) - if mask is None: - continue - tags = rule.get('tags', []) - apply_tag_vectorized(df, mask, tags) - def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[bool]: """Create a boolean mask for cards with mass removal effects of a specific type. @@ -416,60 +326,6 @@ def create_mass_effect_mask(df: pd.DataFrame, effect_type: str) -> pd.Series[boo patterns = tag_constants.BOARD_WIPE_TEXT_PATTERNS[effect_type] return create_text_mask(df, patterns) -def create_trigger_mask( - df: pd.DataFrame, - subjects: Union[str, List[str]], - include_attacks: bool = False, -) -> pd.Series: - """Create a mask for text that contains trigger phrases followed by subjects. - - Example: with subjects=['a creature','you'] builds patterns: - 'when a creature', 'whenever you', 'at you', etc. - - Args: - df: DataFrame - subjects: A subject string or list (will be normalized to list) - include_attacks: If True, also include '{trigger} .* attacks' - - Returns: - Boolean Series mask - """ - subs = [subjects] if isinstance(subjects, str) else subjects - patterns: List[str] = [] - for trig in tag_constants.TRIGGERS: - patterns.extend([f"{trig} {s}" for s in subs]) - if include_attacks: - patterns.append(f"{trig} .* attacks") - return create_text_mask(df, patterns) - -def create_numbered_phrase_mask( - df: pd.DataFrame, - verb: Union[str, List[str]], - noun: str = '', - numbers: List[str] | None = None, -) -> pd.Series: - """Create a boolean mask for phrases like 'draw {num} card'. - - Args: - df: DataFrame to search - verb: Action verb or list of verbs (e.g., 'draw' or ['gain', 'gains']) - noun: Optional object noun in singular form (e.g., 'card'); if empty, omitted - numbers: Optional list of number words/digits (defaults to tag_constants.NUM_TO_SEARCH) - - Returns: - Boolean Series mask - """ - if numbers is None: - numbers = tag_constants.NUM_TO_SEARCH - # Normalize verbs to list - verbs = [verb] if isinstance(verb, str) else verb - # Build patterns - if noun: - patterns = [fr"{v}\s+{num}\s+{noun}" for v in verbs for num in numbers] - else: - patterns = [fr"{v}\s+{num}" for v in verbs for num in numbers] - return create_text_mask(df, patterns) - def create_damage_pattern(number: Union[int, str]) -> str: """Create a pattern for matching X damage effects. diff --git a/code/tagging/tagger.py b/code/tagging/tagger.py index 595bc9f..b88b5ad 100644 --- a/code/tagging/tagger.py +++ b/code/tagging/tagger.py @@ -3044,15 +3044,18 @@ def tag_for_special_counters(df: pd.DataFrame, color: str) -> None: start_time = pd.Timestamp.now() try: - # Process each counter type (supports singular/plural) + # Process each counter type counter_counts = {} - for spec in tag_constants.ALL_COUNTER_SPECS: - pattern = spec.search_pattern() + for counter_type in tag_constants.COUNTER_TYPES: + # Create pattern for this counter type + pattern = f'{counter_type} counter' mask = tag_utils.create_text_mask(df, pattern) + if mask.any(): - tags = [f'{spec.label} Counters', 'Counters Matter'] - tag_utils.apply_rules(df, [ {'mask': mask, 'tags': tags} ]) - counter_counts[spec.label] = int(mask.sum()) + # Apply tags via rules engine + tags = [f'{counter_type} Counters', 'Counters Matter'] + tag_utils.apply_rules(df, [ { 'mask': mask, 'tags': tags } ]) + counter_counts[counter_type] = mask.sum() # Log results duration = (pd.Timestamp.now() - start_time).total_seconds() @@ -6491,4 +6494,3 @@ def run_tagging(parallel: bool = False, max_workers: int | None = None): duration = (pd.Timestamp.now() - start_time).total_seconds() logger.info(f'Tagged cards in {duration:.2f}s') - diff --git a/requirements.txt b/requirements.txt index ca651e1..90b7f5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,6 @@ inquirer>=3.1.3 typing_extensions>=4.5.0 fuzzywuzzy>=0.18.0 python-Levenshtein>=0.12.0 -tqdm>=4.66.0 -scrython>=1.10.0 -numpy>=1.24.0 -requests>=2.31.0 # Development dependencies mypy>=1.3.0