From b2ccbbd664eb420a6b1ad792a1d9a6f10b034c4c Mon Sep 17 00:00:00 2001 From: matt Date: Sun, 19 Oct 2025 08:37:07 -0700 Subject: [PATCH] chore: comment out debug step in similarity cache workflow The debug step was helpful for diagnosing numpy array issues but is no longer needed for normal operation. Commented out rather than removed so it's available if needed for future troubleshooting. --- .github/workflows/build-similarity-cache.yml | 77 ++++++++++---------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build-similarity-cache.yml b/.github/workflows/build-similarity-cache.yml index 1d83171..e17ee4f 100644 --- a/.github/workflows/build-similarity-cache.yml +++ b/.github/workflows/build-similarity-cache.yml @@ -89,44 +89,45 @@ jobs: exit 1 fi - - name: Debug - Inspect Parquet file after tagging - if: steps.check_cache.outputs.needs_build == 'true' - run: | - python -c " - import pandas as pd - from pathlib import Path - from code.path_util import get_processed_cards_path - - parquet_path = Path(get_processed_cards_path()) - print(f'Reading Parquet file: {parquet_path}') - print(f'File exists: {parquet_path.exists()}') - - if not parquet_path.exists(): - raise FileNotFoundError(f'Parquet file not found: {parquet_path}') - - df = pd.read_parquet(parquet_path) - print(f'Loaded {len(df)} rows from Parquet file') - print(f'Columns: {list(df.columns)}') - print('') - - # Show first 5 rows completely - print('First 5 complete rows:') - print('=' * 100) - for idx, row in df.head(5).iterrows(): - print(f'Row {idx}:') - for col in df.columns: - value = row[col] - if isinstance(value, (list, tuple)) or hasattr(value, '__array__'): - # For array-like, show type and length - try: - length = len(value) - print(f' {col}: {type(value).__name__}[{length}] = {value}') - except: - print(f' {col}: {type(value).__name__} = {value}') - else: - print(f' {col}: {value}') - print('-' * 100) - " + # Debug step - uncomment if needed to inspect Parquet file contents + # - name: Debug - Inspect Parquet file after tagging + # if: steps.check_cache.outputs.needs_build == 'true' + # run: | + # python -c " + # import pandas as pd + # from pathlib import Path + # from code.path_util import get_processed_cards_path + # + # parquet_path = Path(get_processed_cards_path()) + # print(f'Reading Parquet file: {parquet_path}') + # print(f'File exists: {parquet_path.exists()}') + # + # if not parquet_path.exists(): + # raise FileNotFoundError(f'Parquet file not found: {parquet_path}') + # + # df = pd.read_parquet(parquet_path) + # print(f'Loaded {len(df)} rows from Parquet file') + # print(f'Columns: {list(df.columns)}') + # print('') + # + # # Show first 5 rows completely + # print('First 5 complete rows:') + # print('=' * 100) + # for idx, row in df.head(5).iterrows(): + # print(f'Row {idx}:') + # for col in df.columns: + # value = row[col] + # if isinstance(value, (list, tuple)) or hasattr(value, '__array__'): + # # For array-like, show type and length + # try: + # length = len(value) + # print(f' {col}: {type(value).__name__}[{length}] = {value}') + # except: + # print(f' {col}: {type(value).__name__} = {value}') + # else: + # print(f' {col}: {value}') + # print('-' * 100) + # " - name: Generate theme catalog if: steps.check_cache.outputs.needs_build == 'true'