fix: correct DataFrame column filtering and enhance debug output

- Fix KeyError in generate_theme_catalog.py: use isCommander column correctly
- DataFrame.get() doesn't work like dict.get() - use column name directly
- Enhanced debug step to print full row data for better diagnostics
This commit is contained in:
matt 2025-10-18 22:32:54 -07:00
parent a689400c47
commit 29b5da4778
2 changed files with 23 additions and 21 deletions

View file

@ -109,25 +109,23 @@ jobs:
print(f'Columns: {list(df.columns)}')
print('')
# Show first 10 rows with their themeTags
print('First 10 cards with themeTags:')
print('=' * 80)
for idx, row in df.head(10).iterrows():
name = row.get('name', 'UNKNOWN')
tags = row.get('themeTags', [])
tag_count = len(tags) if isinstance(tags, list) else 0
print(f'{idx}: {name}')
print(f' Type: {type(tags).__name__}')
print(f' Count: {tag_count}')
if tag_count > 0:
# Show first 5 tags
sample = tags[:5] if tag_count > 5 else tags
print(f' Tags: {sample}')
if tag_count > 5:
print(f' ... and {tag_count - 5} more')
else:
print(f' Tags: (empty)')
print('')
# Show first 5 rows completely
print('First 5 complete rows:')
print('=' * 100)
for idx, row in df.head(5).iterrows():
print(f'Row {idx}:')
for col in df.columns:
value = row[col]
if isinstance(value, (list, tuple)) or hasattr(value, '__array__'):
# For array-like, show type and length
try:
length = len(value)
print(f' {col}: {type(value).__name__}[{length}] = {value}')
except:
print(f' {col}: {type(value).__name__} = {value}')
else:
print(f' {col}: {value}')
print('-' * 100)
"
- name: Generate theme catalog

View file

@ -247,9 +247,13 @@ def build_theme_catalog(
all_cards_parquet, theme_variants=theme_variants
)
# For commander counts, filter all_cards by is_commander column
# For commander counts, filter all_cards by isCommander column
df_commanders = pd.read_parquet(all_cards_parquet)
df_commanders = df_commanders[df_commanders.get('is_commander', False)]
if 'isCommander' in df_commanders.columns:
df_commanders = df_commanders[df_commanders['isCommander']]
else:
# Fallback: assume all cards could be commanders if column missing
pass
commander_counts = Counter()
for tags in df_commanders['themeTags'].tolist():
if tags is None or (isinstance(tags, float) and pd.isna(tags)):