mirror of
https://github.com/mwisnowski/mtg_python_deckbuilder.git
synced 2025-12-16 15:40:12 +01:00
fix: remove CSV fallback from theme catalog generation, add Parquet debug step
- Remove CSV fallback logic (Parquet-only in M4 migration) - Add better error messages when Parquet file missing or empty - Add workflow debug step to inspect Parquet file after tagging - Simplify build_theme_catalog function signature
This commit is contained in:
parent
9e6c3e66e9
commit
30dfca0b67
2 changed files with 134 additions and 110 deletions
53
.github/workflows/build-similarity-cache.yml
vendored
53
.github/workflows/build-similarity-cache.yml
vendored
|
|
@ -88,13 +88,60 @@ jobs:
|
|||
echo "ERROR: Tagging completion flag not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Debug - Inspect Parquet file after tagging
|
||||
if: steps.check_cache.outputs.needs_build == 'true'
|
||||
run: |
|
||||
python -c "
|
||||
import pandas as pd
|
||||
from code.path_util import get_processed_cards_path
|
||||
|
||||
# Verify theme catalog was generated
|
||||
parquet_path = get_processed_cards_path()
|
||||
print(f'Reading Parquet file: {parquet_path}')
|
||||
print(f'File exists: {parquet_path.exists()}')
|
||||
|
||||
if not parquet_path.exists():
|
||||
raise FileNotFoundError(f'Parquet file not found: {parquet_path}')
|
||||
|
||||
df = pd.read_parquet(parquet_path)
|
||||
print(f'Loaded {len(df)} rows from Parquet file')
|
||||
print(f'Columns: {list(df.columns)}')
|
||||
print('')
|
||||
|
||||
# Show first 10 rows with their themeTags
|
||||
print('First 10 cards with themeTags:')
|
||||
print('=' * 80)
|
||||
for idx, row in df.head(10).iterrows():
|
||||
name = row.get('name', 'UNKNOWN')
|
||||
tags = row.get('themeTags', [])
|
||||
tag_count = len(tags) if isinstance(tags, list) else 0
|
||||
print(f'{idx}: {name}')
|
||||
print(f' Type: {type(tags).__name__}')
|
||||
print(f' Count: {tag_count}')
|
||||
if tag_count > 0:
|
||||
# Show first 5 tags
|
||||
sample = tags[:5] if tag_count > 5 else tags
|
||||
print(f' Tags: {sample}')
|
||||
if tag_count > 5:
|
||||
print(f' ... and {tag_count - 5} more')
|
||||
else:
|
||||
print(f' Tags: (empty)')
|
||||
print('')
|
||||
"
|
||||
|
||||
- name: Generate theme catalog
|
||||
if: steps.check_cache.outputs.needs_build == 'true'
|
||||
run: |
|
||||
if [ ! -f "config/themes/theme_catalog.csv" ]; then
|
||||
echo "WARNING: Theme catalog not found, generating..."
|
||||
echo "Theme catalog not found, generating..."
|
||||
python -m code.scripts.generate_theme_catalog
|
||||
else
|
||||
echo "Theme catalog already exists, skipping generation"
|
||||
fi
|
||||
|
||||
|
||||
- name: Verify theme catalog and tag statistics
|
||||
if: steps.check_cache.outputs.needs_build == 'true'
|
||||
run: |
|
||||
# Detailed check of what tags were actually written
|
||||
python -c "
|
||||
import pandas as pd
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue