fix: correct DataFrame column filtering and enhance debug output

- Fix KeyError in generate_theme_catalog.py: use isCommander column correctly
- DataFrame.get() doesn't work like dict.get() - use column name directly
- Enhanced debug step to print full row data for better diagnostics
This commit is contained in:
matt 2025-10-18 22:32:54 -07:00
parent a689400c47
commit 29b5da4778
2 changed files with 23 additions and 21 deletions

View file

@ -109,25 +109,23 @@ jobs:
print(f'Columns: {list(df.columns)}') print(f'Columns: {list(df.columns)}')
print('') print('')
# Show first 10 rows with their themeTags # Show first 5 rows completely
print('First 10 cards with themeTags:') print('First 5 complete rows:')
print('=' * 80) print('=' * 100)
for idx, row in df.head(10).iterrows(): for idx, row in df.head(5).iterrows():
name = row.get('name', 'UNKNOWN') print(f'Row {idx}:')
tags = row.get('themeTags', []) for col in df.columns:
tag_count = len(tags) if isinstance(tags, list) else 0 value = row[col]
print(f'{idx}: {name}') if isinstance(value, (list, tuple)) or hasattr(value, '__array__'):
print(f' Type: {type(tags).__name__}') # For array-like, show type and length
print(f' Count: {tag_count}') try:
if tag_count > 0: length = len(value)
# Show first 5 tags print(f' {col}: {type(value).__name__}[{length}] = {value}')
sample = tags[:5] if tag_count > 5 else tags except:
print(f' Tags: {sample}') print(f' {col}: {type(value).__name__} = {value}')
if tag_count > 5: else:
print(f' ... and {tag_count - 5} more') print(f' {col}: {value}')
else: print('-' * 100)
print(f' Tags: (empty)')
print('')
" "
- name: Generate theme catalog - name: Generate theme catalog

View file

@ -247,9 +247,13 @@ def build_theme_catalog(
all_cards_parquet, theme_variants=theme_variants all_cards_parquet, theme_variants=theme_variants
) )
# For commander counts, filter all_cards by is_commander column # For commander counts, filter all_cards by isCommander column
df_commanders = pd.read_parquet(all_cards_parquet) df_commanders = pd.read_parquet(all_cards_parquet)
df_commanders = df_commanders[df_commanders.get('is_commander', False)] if 'isCommander' in df_commanders.columns:
df_commanders = df_commanders[df_commanders['isCommander']]
else:
# Fallback: assume all cards could be commanders if column missing
pass
commander_counts = Counter() commander_counts = Counter()
for tags in df_commanders['themeTags'].tolist(): for tags in df_commanders['themeTags'].tolist():
if tags is None or (isinstance(tags, float) and pd.isna(tags)): if tags is None or (isinstance(tags, float) and pd.isna(tags)):