mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 15:40:12 +01:00
fix: systematically handle numpy arrays from Parquet files across codebase
- Add ensure_theme_tags_list() utility to builder_utils for simpler numpy array handling - Update phase3_creatures.py: 6 locations now use bu.ensure_theme_tags_list() - Update phase4_spells.py: 9 locations now use bu.ensure_theme_tags_list() - Update tagger.py: 2 locations use hasattr/list() for numpy compatibility - Update extract_themes.py: 2 locations use hasattr/list() for numpy compatibility - Fix build-similarity-cache.yml verification script to handle numpy arrays - Enhance workflow debug output to show complete row data Parquet files return numpy.ndarray objects for array columns, not Python lists. The M4 migration added numpy support to canonical parse_theme_tags() in builder_utils, but many parts of the codebase still used isinstance(list) checks that fail with arrays. This commit systematically replaces all 19 instances with proper numpy array handling. Fixes GitHub Actions workflow 'RuntimeError: No theme tags found' and verification failures.
This commit is contained in:
parent
db0b0ccfdb
commit
bff64de370
6 changed files with 47 additions and 22 deletions
17
.github/workflows/build-similarity-cache.yml
vendored
17
.github/workflows/build-similarity-cache.yml
vendored
|
|
@ -147,6 +147,17 @@ jobs:
|
|||
from code.path_util import get_processed_cards_path
|
||||
df = pd.read_parquet(get_processed_cards_path())
|
||||
|
||||
# Helper to count tags (handles both list and numpy array)
|
||||
def count_tags(x):
|
||||
if x is None:
|
||||
return 0
|
||||
if hasattr(x, '__len__'):
|
||||
try:
|
||||
return len(x)
|
||||
except:
|
||||
return 0
|
||||
return 0
|
||||
|
||||
# Count total tags
|
||||
total_tags = 0
|
||||
cards_with_tags = 0
|
||||
|
|
@ -155,7 +166,7 @@ jobs:
|
|||
for idx, row in df.head(10).iterrows():
|
||||
name = row['name']
|
||||
tags = row['themeTags']
|
||||
tag_count = len(tags) if isinstance(tags, list) else 0
|
||||
tag_count = count_tags(tags)
|
||||
total_tags += tag_count
|
||||
if tag_count > 0:
|
||||
cards_with_tags += 1
|
||||
|
|
@ -166,8 +177,8 @@ jobs:
|
|||
print(f' {card}')
|
||||
|
||||
# Full count
|
||||
all_tags = df['themeTags'].apply(lambda x: len(x) if isinstance(x, list) else 0).sum()
|
||||
all_with_tags = (df['themeTags'].apply(lambda x: len(x) if isinstance(x, list) else 0) > 0).sum()
|
||||
all_tags = df['themeTags'].apply(count_tags).sum()
|
||||
all_with_tags = (df['themeTags'].apply(count_tags) > 0).sum()
|
||||
|
||||
print(f'')
|
||||
print(f'Total cards: {len(df):,}')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue