fix: handle numpy arrays in parse_theme_tags

Parquet files return numpy arrays, not Python lists. Added conversion
from ndarray to list before processing theme tags.
This commit is contained in:
matt 2025-10-18 22:39:53 -07:00
parent 7a94e195b7
commit db0b0ccfdb

View file

@ -73,6 +73,12 @@ def canonical_key(raw: str) -> str:
def parse_theme_tags(value: object) -> List[str]:
if value is None:
return []
# Handle numpy arrays (from Parquet files)
if hasattr(value, '__array__') or hasattr(value, 'tolist'):
try:
value = value.tolist() if hasattr(value, 'tolist') else list(value)
except Exception:
pass
if isinstance(value, list):
return [str(v) for v in value if isinstance(v, str) and v.strip()]
if isinstance(value, str):