diff --git a/.github/workflows/build-similarity-cache.yml b/.github/workflows/build-similarity-cache.yml index a4a4bbc..b393bfe 100644 --- a/.github/workflows/build-similarity-cache.yml +++ b/.github/workflows/build-similarity-cache.yml @@ -147,6 +147,17 @@ jobs: from code.path_util import get_processed_cards_path df = pd.read_parquet(get_processed_cards_path()) + # Helper to count tags (handles both list and numpy array) + def count_tags(x): + if x is None: + return 0 + if hasattr(x, '__len__'): + try: + return len(x) + except: + return 0 + return 0 + # Count total tags total_tags = 0 cards_with_tags = 0 @@ -155,7 +166,7 @@ jobs: for idx, row in df.head(10).iterrows(): name = row['name'] tags = row['themeTags'] - tag_count = len(tags) if isinstance(tags, list) else 0 + tag_count = count_tags(tags) total_tags += tag_count if tag_count > 0: cards_with_tags += 1 @@ -166,8 +177,8 @@ jobs: print(f' {card}') # Full count - all_tags = df['themeTags'].apply(lambda x: len(x) if isinstance(x, list) else 0).sum() - all_with_tags = (df['themeTags'].apply(lambda x: len(x) if isinstance(x, list) else 0) > 0).sum() + all_tags = df['themeTags'].apply(count_tags).sum() + all_with_tags = (df['themeTags'].apply(count_tags) > 0).sum() print(f'') print(f'Total cards: {len(df):,}') diff --git a/code/deck_builder/builder_utils.py b/code/deck_builder/builder_utils.py index 5fc98d4..6847ecf 100644 --- a/code/deck_builder/builder_utils.py +++ b/code/deck_builder/builder_utils.py @@ -249,6 +249,18 @@ def parse_theme_tags(val) -> list[str]: return [] +def ensure_theme_tags_list(val) -> list[str]: + """Safely convert themeTags value to list, handling None, lists, and numpy arrays. + + This is a simpler wrapper around parse_theme_tags for the common case where + you just need to ensure you have a list to work with. + """ + if val is None: + return [] + return parse_theme_tags(val) + + + def normalize_theme_list(raw) -> list[str]: """Parse then lowercase + strip each tag.""" tags = parse_theme_tags(raw) diff --git a/code/deck_builder/phases/phase3_creatures.py b/code/deck_builder/phases/phase3_creatures.py index bbf5f60..fe380af 100644 --- a/code/deck_builder/phases/phase3_creatures.py +++ b/code/deck_builder/phases/phase3_creatures.py @@ -120,7 +120,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role='all_theme', added_by='creature_all_theme', @@ -231,7 +231,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role=role, added_by='creature_add', @@ -288,7 +288,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role='fill', added_by='creature_fill', @@ -551,7 +551,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role=role, added_by='creature_add', @@ -590,7 +590,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role='fill', added_by='creature_fill', @@ -672,7 +672,7 @@ class CreatureAdditionMixin: mana_cost=row.get('manaCost',''), mana_value=row.get('manaValue', row.get('cmc','')), creature_types=row.get('creatureTypes', []) if isinstance(row.get('creatureTypes', []), list) else [], - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='creature', sub_role='all_theme', added_by='creature_all_theme', diff --git a/code/deck_builder/phases/phase4_spells.py b/code/deck_builder/phases/phase4_spells.py index 3ec39fb..632806d 100644 --- a/code/deck_builder/phases/phase4_spells.py +++ b/code/deck_builder/phases/phase4_spells.py @@ -193,7 +193,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='ramp', sub_role=phase_name.lower(), added_by='spell_ramp' @@ -322,7 +322,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='removal', sub_role='spot', added_by='spell_removal' @@ -399,7 +399,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='wipe', sub_role='board', added_by='spell_wipe' @@ -493,7 +493,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='card_advantage', sub_role='conditional', added_by='spell_draw' @@ -516,7 +516,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='card_advantage', sub_role='unconditional', added_by='spell_draw' @@ -713,7 +713,7 @@ class SpellAdditionMixin: card_type=r.get('type',''), mana_cost=r.get('manaCost',''), mana_value=r.get('manaValue', r.get('cmc','')), - tags=r.get('themeTags', []) if isinstance(r.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r.get('themeTags')), role='protection', added_by='spell_protection' ) @@ -879,7 +879,7 @@ class SpellAdditionMixin: card_type=row.get('type', ''), mana_cost=row.get('manaCost', ''), mana_value=row.get('manaValue', row.get('cmc', '')), - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='theme_spell', sub_role=role, added_by='spell_theme_fill', @@ -942,7 +942,7 @@ class SpellAdditionMixin: card_type=row.get('type', ''), mana_cost=row.get('manaCost', ''), mana_value=row.get('manaValue', row.get('cmc', '')), - tags=row.get('themeTags', []) if isinstance(row.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(row.get('themeTags')), role='theme_spell', sub_role='fill_multi', added_by='spell_theme_fill', @@ -1006,7 +1006,7 @@ class SpellAdditionMixin: card_type=r0.get('type',''), mana_cost=r0.get('manaCost',''), mana_value=r0.get('manaValue', r0.get('cmc','')), - tags=r0.get('themeTags', []) if isinstance(r0.get('themeTags', []), list) else [], + tags=bu.ensure_theme_tags_list(r0.get('themeTags')), role='filler', sub_role=r0.get('_fillerCat',''), added_by='spell_general_filler' @@ -1058,4 +1058,4 @@ class SpellAdditionMixin: """ """Public method for orchestration: delegates to add_non_creature_spells.""" return self.add_non_creature_spells() - \ No newline at end of file + diff --git a/code/scripts/extract_themes.py b/code/scripts/extract_themes.py index d3b4fdc..c45e7c5 100644 --- a/code/scripts/extract_themes.py +++ b/code/scripts/extract_themes.py @@ -126,7 +126,7 @@ def tally_tag_frequencies_by_base_color() -> Dict[str, Dict[str, int]]: return derived # Iterate rows for _, row in df.iterrows(): - tags = row['themeTags'] if isinstance(row['themeTags'], list) else [] + tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else [] # Compute base colors contribution ci = row['colorIdentity'] if 'colorIdentity' in row else None letters = set(ci) if isinstance(ci, list) else set() @@ -162,7 +162,7 @@ def gather_theme_tag_rows() -> List[List[str]]: if 'themeTags' not in df.columns: continue for _, row in df.iterrows(): - tags = row['themeTags'] if isinstance(row['themeTags'], list) else [] + tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else [] if tags: rows.append(tags) return rows @@ -523,3 +523,4 @@ def main() -> None: if __name__ == "__main__": main() + diff --git a/code/tagging/tagger.py b/code/tagging/tagger.py index 096938d..526aa5f 100644 --- a/code/tagging/tagger.py +++ b/code/tagging/tagger.py @@ -1054,7 +1054,7 @@ def tag_for_keywords(df: pd.DataFrame, color: str) -> None: exclusion_keywords = {'partner'} def _merge_keywords(row: pd.Series) -> list[str]: - base_tags = row['themeTags'] if isinstance(row['themeTags'], list) else [] + base_tags = list(row['themeTags']) if hasattr(row.get('themeTags'), '__len__') and not isinstance(row.get('themeTags'), str) else [] keywords_raw = row['keywords'] if isinstance(keywords_raw, str): @@ -6892,3 +6892,4 @@ def run_tagging(parallel: bool = False, max_workers: int | None = None): +