From 29b5da47782a2af1a14d4ac2bf70c1896a96e986 Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 18 Oct 2025 22:32:54 -0700 Subject: [PATCH] fix: correct DataFrame column filtering and enhance debug output - Fix KeyError in generate_theme_catalog.py: use isCommander column correctly - DataFrame.get() doesn't work like dict.get() - use column name directly - Enhanced debug step to print full row data for better diagnostics --- .github/workflows/build-similarity-cache.yml | 36 +++++++++----------- code/scripts/generate_theme_catalog.py | 8 +++-- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build-similarity-cache.yml b/.github/workflows/build-similarity-cache.yml index d135cea..a4a4bbc 100644 --- a/.github/workflows/build-similarity-cache.yml +++ b/.github/workflows/build-similarity-cache.yml @@ -109,25 +109,23 @@ jobs: print(f'Columns: {list(df.columns)}') print('') - # Show first 10 rows with their themeTags - print('First 10 cards with themeTags:') - print('=' * 80) - for idx, row in df.head(10).iterrows(): - name = row.get('name', 'UNKNOWN') - tags = row.get('themeTags', []) - tag_count = len(tags) if isinstance(tags, list) else 0 - print(f'{idx}: {name}') - print(f' Type: {type(tags).__name__}') - print(f' Count: {tag_count}') - if tag_count > 0: - # Show first 5 tags - sample = tags[:5] if tag_count > 5 else tags - print(f' Tags: {sample}') - if tag_count > 5: - print(f' ... and {tag_count - 5} more') - else: - print(f' Tags: (empty)') - print('') + # Show first 5 rows completely + print('First 5 complete rows:') + print('=' * 100) + for idx, row in df.head(5).iterrows(): + print(f'Row {idx}:') + for col in df.columns: + value = row[col] + if isinstance(value, (list, tuple)) or hasattr(value, '__array__'): + # For array-like, show type and length + try: + length = len(value) + print(f' {col}: {type(value).__name__}[{length}] = {value}') + except: + print(f' {col}: {type(value).__name__} = {value}') + else: + print(f' {col}: {value}') + print('-' * 100) " - name: Generate theme catalog diff --git a/code/scripts/generate_theme_catalog.py b/code/scripts/generate_theme_catalog.py index d76cb22..70cb8ad 100644 --- a/code/scripts/generate_theme_catalog.py +++ b/code/scripts/generate_theme_catalog.py @@ -247,9 +247,13 @@ def build_theme_catalog( all_cards_parquet, theme_variants=theme_variants ) - # For commander counts, filter all_cards by is_commander column + # For commander counts, filter all_cards by isCommander column df_commanders = pd.read_parquet(all_cards_parquet) - df_commanders = df_commanders[df_commanders.get('is_commander', False)] + if 'isCommander' in df_commanders.columns: + df_commanders = df_commanders[df_commanders['isCommander']] + else: + # Fallback: assume all cards could be commanders if column missing + pass commander_counts = Counter() for tags in df_commanders['themeTags'].tolist(): if tags is None or (isinstance(tags, float) and pd.isna(tags)):