From db0b0ccfdbb944994e3034e793523033bf99a057 Mon Sep 17 00:00:00 2001 From: matt Date: Sat, 18 Oct 2025 22:39:53 -0700 Subject: [PATCH] fix: handle numpy arrays in parse_theme_tags Parquet files return numpy arrays, not Python lists. Added conversion from ndarray to list before processing theme tags. --- code/scripts/generate_theme_catalog.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/code/scripts/generate_theme_catalog.py b/code/scripts/generate_theme_catalog.py index 82206b0..39f197b 100644 --- a/code/scripts/generate_theme_catalog.py +++ b/code/scripts/generate_theme_catalog.py @@ -73,6 +73,12 @@ def canonical_key(raw: str) -> str: def parse_theme_tags(value: object) -> List[str]: if value is None: return [] + # Handle numpy arrays (from Parquet files) + if hasattr(value, '__array__') or hasattr(value, 'tolist'): + try: + value = value.tolist() if hasattr(value, 'tolist') else list(value) + except Exception: + pass if isinstance(value, list): return [str(v) for v in value if isinstance(v, str) and v.strip()] if isinstance(value, str):